In [1]:
import pandas as pd
import polars as pl
import numpy as np
import os 

#### Data Loading and Merging

In [2]:
file_path ='/kaggle/input/forecasting-the-future-the-helios-corn-climate-challenge'

In [3]:
riskfutures = pd.read_csv('Data/corn_climate_risk_futures_daily_master.csv')
marketshare = pd.read_csv('Data/corn_regional_market_share.csv')

In [4]:
mergedf = riskfutures.copy()
mergedf['day_of_year'] = pd.to_datetime(mergedf['date_on'],format='%Y-%m-%d').dt.dayofyear
mergedf['quarter'] = pd.to_datetime(mergedf['date_on'],format='%Y-%m-%d').dt.quarter

In [5]:
mergedf = mergedf.merge(marketshare[['region_id','percent_country_production']],how='left',on='region_id')

In [6]:
mergedf['percent_country_production'] = mergedf['percent_country_production'].fillna(0.0)

#### Introduction of Climate Risk (Coldwave) by Tim

In [7]:
# Total cnt locations for each rows
mergedf['total_location_by_region'] = mergedf['climate_risk_cnt_locations_heat_stress_risk_low'] + \
                                    mergedf['climate_risk_cnt_locations_heat_stress_risk_medium'] + \
                                    mergedf['climate_risk_cnt_locations_heat_stress_risk_high']

# Climate Risk for Coldwave, and Flood:
for i in range(1, 5):
    mergedf[f'medium_coldstress_lag_{i}'] = mergedf['climate_risk_cnt_locations_unseasonably_cold_risk_medium'].shift(i)
    mergedf[f'medium_coldstress_lag_{i}'] = mergedf[f'medium_coldstress_lag_{i}'].fillna(0)

for j in range(1, 3): 
    mergedf[f'high_coldstress_lag_{j}'] = mergedf['climate_risk_cnt_locations_unseasonably_cold_risk_high'].shift(j)
    mergedf[f'high_coldstress_lag_{j}'] = mergedf[f'high_coldstress_lag_{j}'].fillna(0)

mergedf['medium_coldstress_4days_average'] = mergedf[[f'medium_coldstress_lag_{i}' for i in range(1, 5)]].mean(axis=1)
mergedf['medium_coldstress_2days_average'] = mergedf[[f'medium_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)
mergedf['high_coldstress_2days_average'] = mergedf[[f'high_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)

mergedf['climate_risk_cnt_locations_coldwave_risk_high'] = (mergedf['medium_coldstress_4days_average'] + mergedf['high_coldstress_2days_average']) / 2
mergedf['climate_risk_cnt_locations_coldwave_risk_medium'] = (mergedf['medium_coldstress_2days_average'] + mergedf['high_coldstress_lag_1']) / 2


#### Supply Chain and Seasonality Weightings by Tim

In [8]:
supply_weights = {
    "United States": 2.00, "Brazil": 1.85, "Argentina": 1.75, "Ukraine": 1.60, "Russia": 1.40, "Canada": 1.40,
    "China": 1.30, "Mexico": 1.25,
    "South Africa": 1.20,
    "Paraguay": 1.10, "India": 1.05}

mergedf['supply_chain_weightings'] = mergedf['country_name'].map(supply_weights)

seasonal_weights = {
            'Off-season': 1,
            'Planting': 1.5,
            'Mid-season': 2,
            'Harvest': 2,
            'Peak Harvest': 1.5,}

mergedf['seasonality_weightings'] = mergedf['harvest_period'].map(seasonal_weights)
mergedf['adjusted_weightings'] = mergedf['supply_chain_weightings'] * mergedf['seasonality_weightings']

In [9]:
mergedf['sum_of_medium_high_drought_risk'] = mergedf['climate_risk_cnt_locations_drought_risk_medium'] + mergedf['climate_risk_cnt_locations_drought_risk_high']
mergedf['sum_of_medium_high_excess_precip_risk'] = mergedf['climate_risk_cnt_locations_excess_precip_risk_medium'] + mergedf['climate_risk_cnt_locations_excess_precip_risk_high']
mergedf['sum_of_medium_high_unseasonably_cold_risk'] = mergedf['climate_risk_cnt_locations_unseasonably_cold_risk_medium'] + mergedf['climate_risk_cnt_locations_unseasonably_cold_risk_high']
mergedf['sum_of_medium_high_heat_stress_risk'] = mergedf['climate_risk_cnt_locations_heat_stress_risk_medium'] + mergedf['climate_risk_cnt_locations_heat_stress_risk_high']

mergedf['sum_of_medium_high_drought_risk_lag_1yr'] = mergedf['sum_of_medium_high_drought_risk'].shift(365)
mergedf['sum_of_medium_high_excess_precip_risk_lag_1yr'] = mergedf['sum_of_medium_high_excess_precip_risk'].shift(365)
mergedf['sum_of_medium_high_unseasonably_cold_risk_lag_1yr'] = mergedf['sum_of_medium_high_unseasonably_cold_risk'].shift(365)
mergedf['sum_of_medium_high_heat_stress_risk_lag_1yr'] = mergedf['sum_of_medium_high_heat_stress_risk'].shift(365)

mergedf['sum_of_medium_high_drought_risk_lag_1yr'] = mergedf['sum_of_medium_high_drought_risk_lag_1yr'].fillna(0)
mergedf['sum_of_medium_high_excess_precip_risk_lag_1yr'] = mergedf['sum_of_medium_high_excess_precip_risk_lag_1yr'].fillna(0)
mergedf['sum_of_medium_high_unseasonably_cold_risk_lag_1yr'] = mergedf['sum_of_medium_high_unseasonably_cold_risk_lag_1yr'].fillna(0)
mergedf['sum_of_medium_high_heat_stress_risk_lag_1yr'] = mergedf['sum_of_medium_high_heat_stress_risk_lag_1yr'].fillna(0)

mergedf['diff_drought'] = mergedf['sum_of_medium_high_drought_risk'] - mergedf['sum_of_medium_high_drought_risk_lag_1yr']
mergedf['diff_excessprecip'] = mergedf['sum_of_medium_high_excess_precip_risk'] - mergedf['sum_of_medium_high_excess_precip_risk_lag_1yr']
mergedf['diff_cold'] = mergedf['sum_of_medium_high_unseasonably_cold_risk'] - mergedf['sum_of_medium_high_unseasonably_cold_risk_lag_1yr']
mergedf['diff_heatstress'] = mergedf['sum_of_medium_high_heat_stress_risk'] - mergedf['sum_of_medium_high_heat_stress_risk_lag_1yr']

In [10]:
category = ['drought', 'excessprecip', 'cold', 'heatstress']

mergedf['worse_off_indicator'] = 0

for disaster in category:
    x = mergedf.loc[:, f'diff_{disaster}']

    if isinstance(x, pd.DataFrame):
        x = x.iloc[:, 0]
    
    mergedf['worse_off_indicator'] += pd.Series(1, index=x.index).where(x > 1, 0) 

mergedf.head()

Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,climate_risk_cnt_locations_heat_stress_risk_low,...,sum_of_medium_high_heat_stress_risk,sum_of_medium_high_drought_risk_lag_1yr,sum_of_medium_high_excess_precip_risk_lag_1yr,sum_of_medium_high_unseasonably_cold_risk_lag_1yr,sum_of_medium_high_heat_stress_risk_lag_1yr,diff_drought,diff_excessprecip,diff_cold,diff_heatstress,worse_off_indicator
0,8af42722-3f05-4ede-80fc-605e0e2b3b67,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-15,23,...,0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,1
1,54f4ddc5-e7ab-4bfb-ad6a-5649841af563,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-16,23,...,0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,1
2,63a41fce-d371-4295-a58a-dc6491664020,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-17,23,...,0,0.0,0.0,0.0,0.0,9.0,0.0,9.0,0.0,2
3,cddfa440-e0eb-4735-beb1-1aca2afefe53,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-18,23,...,0,0.0,0.0,0.0,0.0,8.0,0.0,3.0,0.0,2
4,3eaacfe1-29be-4da9-b5c9-a9457d2d2b83,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-19,23,...,0,0.0,0.0,0.0,0.0,7.0,0.0,2.0,0.0,2


In [11]:
mergedf['forecasting_weighting_for_supply_shock'] = mergedf['worse_off_indicator'] * mergedf['adjusted_weightings']

print(mergedf['supply_chain_weightings'].isna().sum())
print(mergedf['adjusted_weightings'].isna().sum())
print(mergedf['forecasting_weighting_for_supply_shock'].isna().sum())

0
0
0


### 1 Baseline Feature Engineering

#### 1.1 Production-Weighted Risk Scores

In [12]:
risk_categories = ['heat_stress', 'unseasonably_cold', 'excess_precip', 'drought', 'coldwave']
for risk in risk_categories:
    medium = f'climate_risk_cnt_locations_{risk}_risk_medium'
    high = f'climate_risk_cnt_locations_{risk}_risk_high'
    
    risk_scores = (1*mergedf[medium]+2*mergedf[high])/\
                           (mergedf['total_location_by_region'])
    ## define regional daily risk score as normalized weighted sum of number of locations
    
    production_weighted_risk_scores = (risk_scores*mergedf['percent_country_production'])/100
    ## use marketshare data to get production-weighted regional daily risk scores
    
    mergedf[f'climate_risk_{risk}_score'] = risk_scores
    mergedf[f'climate_risk_{risk}_weighted_score'] = production_weighted_risk_scores
    ## iterate for all five climate risk types; total 10 new engieered features

#### 1.2 Composite Risk Indices

In [13]:
mergedf['climate_risk_temperature_stress'] = \
mergedf[[f'climate_risk_{risk}_score' for risk in risk_categories[:2]]].max(axis=1) 
## maximum of temperature-related risk scores
mergedf['climate_risk_precipitation_stress'] = \
mergedf[[f'climate_risk_{risk}_score' for risk in risk_categories[2:4]]].max(axis=1)
## maximum of precipitation-related risk scores
mergedf['climate_risk_overall_stress'] = \
mergedf[[f'climate_risk_{risk}_score' for risk in risk_categories]].max(axis=1)
## maximum of all risk scores
mergedf['climate_risk_avg_stress'] = \
mergedf[[f'climate_risk_{risk}_score' for risk in risk_categories]].mean(axis=1)
## average of all risk scores
## total 4 new engineered features

#### 1.3 Risk Temporal Summaries

In [14]:
mergedf = mergedf.sort_values(['region_name','date_on'])
window_period = [7,14,30,60,90,120,240]
## three periods to compute risk scores moving avg and maximum 
for window in window_period:
    for risk in risk_categories:
        mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
        mergedf.groupby(['region_name'])[f'climate_risk_{risk}_score']\
               .rolling(window=window,min_periods=1).mean().reset_index(level=0,drop=True)
## compute risk score moving avg with different windows for different risk types in each region

        mergedf[f'climate_risk_{risk}_max_{window}d'] = \
        mergedf.groupby(['region_name'])[f'climate_risk_{risk}_score']\
               .rolling(window=window,min_periods=1).max().reset_index(level=0,drop=True)
## compute maximum risk scores with different windows for different risk types in each region
## total 7*5*2 = 70 new features

  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \
  mergedf[f'climate_risk_{risk}_ma_{window}d'] = \
  mergedf[f'climate_risk_{risk}_max_{window}d'] = \


#### 1.4 Risk Momentum

In [15]:
features_change1d = mergedf.groupby('region_name')[[f'climate_risk_{risk}_score' for risk in risk_categories]]\
       .diff(periods=1)\
       .rename(columns=dict(zip([f'climate_risk_{risk}_score' for risk in risk_categories],\
                                [f'climate_risk_{risk}_change_1d' for risk in risk_categories])))
## Daily Change of risk scores for each risk type in each region 

features_acceleration = features_change1d.diff(periods=1)
## Acceleration of daily Change of risk scores for each risk type in each region

features_change1w = mergedf.groupby('region_name')[[f'climate_risk_{risk}_score' for risk in risk_categories]]\
       .diff(periods=7)\
       .rename(columns=dict(zip([f'climate_risk_{risk}_score' for risk in risk_categories],\
                                [f'climate_risk_{risk}_change_1d' for risk in risk_categories])))
## Weekly Change of risk scores for each risk type in each region 

mergedf = pd.concat([mergedf,\
           features_change1d,\
           features_change1w,\
           features_acceleration],axis=1)
## 15 new features in Risk Momentum category

#### 1.5 Cross-Regional features

In [16]:
feature_country = pd.concat([\
mergedf.groupby(['country_name', 'date_on'])\
[[f'climate_risk_{risk}_score' for risk in risk_categories]]\
.agg(['mean','max','std']),
## compute country-wide daily avg, max, and std risk scores
mergedf.groupby(['country_name', 'date_on'])\
[[f'climate_risk_{risk}_weighted_score' for risk in risk_categories]]\
.agg('sum')],axis=1)
## compute country-wide daily production-weighted sum risk scores
feature_country.columns = [f'climate_risk_{risk}_score_country_{metric}'\
                          for risk in risk_categories \
                          for metric in ['mean','max','std']]+\
                          [f'climate_risk_{risk}_weighted_score_country_sum'\
                          for risk in risk_categories]
## rename new features
mergedf = mergedf.merge(feature_country.reset_index(),\
              how='left',\
              on=['country_name','date_on'])
## add 4*5=20 new features

#### Columns to Drop (Just select the drought, excess precipitation, and coldwave risks)

In [17]:
cols_to_drop = ['climate_risk_cnt_locations_heat_stress_risk_low',
 'climate_risk_cnt_locations_heat_stress_risk_medium',
 'climate_risk_cnt_locations_heat_stress_risk_high',
 'climate_risk_cnt_locations_unseasonably_cold_risk_low',
 'climate_risk_cnt_locations_unseasonably_cold_risk_medium',
 'climate_risk_cnt_locations_unseasonably_cold_risk_high',
 'climate_risk_cnt_locations_excess_precip_risk_low',
 'climate_risk_cnt_locations_excess_precip_risk_medium',
 'climate_risk_cnt_locations_excess_precip_risk_high',
 'climate_risk_cnt_locations_drought_risk_low',
 'climate_risk_cnt_locations_drought_risk_medium',
 'climate_risk_cnt_locations_drought_risk_high',
 'climate_risk_cnt_locations_coldwave_risk_high',
 'climate_risk_cnt_locations_coldwave_risk_medium']

In [18]:
climate_risk_cols = [c for c in mergedf.columns if c.startswith('climate_risk_')]
climate_risk_selected_cols = [item for item in climate_risk_cols if item not in cols_to_drop]

#### Non-Linear Transformation by William and Tim

In [19]:
climate_risk_cols = [c for c in mergedf.columns if c.startswith('climate_risk_')]
for feature_name in climate_risk_cols:
    x = mergedf.loc[:, feature_name]

    if isinstance(x, pd.DataFrame):
        x = x.iloc[:, 0]
    
    mergedf[f'{feature_name}_log1p'] = np.log1p(x.clip(lower=0))
    mergedf[f'{feature_name}_log1p'] = mergedf[f'{feature_name}_log1p'].fillna(0)
    mergedf[f'{feature_name}_ssqrt'] = np.sign(x) * np.sqrt(np.abs(x))
    mergedf[f'{feature_name}_ssqrt'] = mergedf[f'{feature_name}_ssqrt'].fillna(0)
    mergedf[f'{feature_name}_thresh_mag'] = x.where(x>1, 0)
    mergedf[f'{feature_name}_thresh_mag'] = mergedf[f'{feature_name}_thresh_mag'].fillna(0)
    mergedf[f'{feature_name}_tangent'] = np.tan(x)
    mergedf[f'{feature_name}_tangent'] = mergedf[f'{feature_name}_tangent'].fillna(0)
    mergedf[f'{feature_name}_sin'] = np.sin(x)
    mergedf[f'{feature_name}_sin'] = mergedf[f'{feature_name}_sin'].fillna(0)
    mergedf[f'{feature_name}_cos'] = np.cos(x)
    mergedf[f'{feature_name}_cos'] = mergedf[f'{feature_name}_cos'].fillna(0)

  mergedf[f'{feature_name}_cos'] = np.cos(x)
  mergedf[f'{feature_name}_log1p'] = np.log1p(x.clip(lower=0))
  mergedf[f'{feature_name}_ssqrt'] = np.sign(x) * np.sqrt(np.abs(x))
  mergedf[f'{feature_name}_thresh_mag'] = x.where(x>1, 0)
  mergedf[f'{feature_name}_tangent'] = np.tan(x)
  mergedf[f'{feature_name}_sin'] = np.sin(x)
  mergedf[f'{feature_name}_cos'] = np.cos(x)
  mergedf[f'{feature_name}_log1p'] = np.log1p(x.clip(lower=0))
  mergedf[f'{feature_name}_ssqrt'] = np.sign(x) * np.sqrt(np.abs(x))
  mergedf[f'{feature_name}_thresh_mag'] = x.where(x>1, 0)
  mergedf[f'{feature_name}_tangent'] = np.tan(x)
  mergedf[f'{feature_name}_sin'] = np.sin(x)
  mergedf[f'{feature_name}_cos'] = np.cos(x)
  mergedf[f'{feature_name}_log1p'] = np.log1p(x.clip(lower=0))
  mergedf[f'{feature_name}_ssqrt'] = np.sign(x) * np.sqrt(np.abs(x))
  mergedf[f'{feature_name}_thresh_mag'] = x.where(x>1, 0)
  mergedf[f'{feature_name}_tangent'] = np.tan(x)
  mergedf[f'{feature_name}_sin'] = np.sin(x)
  mergedf[f'{

In [20]:
std_multiplier = [1, 2]
features = mergedf[mergedf.columns[pd.Series(mergedf.columns).apply(\
                     lambda x: x.startswith('climate_risk'))]]

for multiplier in std_multiplier:
    features_threshold = features.transform(lambda x: np.where(x>multiplier*x.std(),x,0))\
                                 .rename(columns=dict(zip(features.columns,\
                                        [f+f'_above_{multiplier}_std'for f in features.columns])))
    mergedf = pd.concat([mergedf,features_threshold],axis=1)

In [21]:
climate_risk_cols = [c for c in mergedf.columns if c.startswith('climate_risk_')]
climate_risk_selected_cols_1 = [item for item in climate_risk_cols if item not in cols_to_drop]

In [22]:
for feature_name in climate_risk_selected_cols_1:
    x = mergedf.loc[:, feature_name]

    if isinstance(x, pd.DataFrame):
        x = x.iloc[:, 0]
    
    mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))

mergedf.shape

  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_shock'] + 1e-8))
  mergedf[f'{feature_name}_supply_weighting'] = (x * (mergedf['forecasting_weighting_for_supply_

(320661, 5301)

In [25]:
isinstance(mergedf.columns, pd.MultiIndex)

False

In [27]:
mergedf.columns[mergedf.columns.duplicated()].tolist()

['climate_risk_heat_stress_change_1d',
 'climate_risk_unseasonably_cold_change_1d',
 'climate_risk_excess_precip_change_1d',
 'climate_risk_drought_change_1d',
 'climate_risk_coldwave_change_1d',
 'climate_risk_heat_stress_change_1d',
 'climate_risk_unseasonably_cold_change_1d',
 'climate_risk_excess_precip_change_1d',
 'climate_risk_drought_change_1d',
 'climate_risk_coldwave_change_1d',
 'climate_risk_heat_stress_change_1d_above_1_std',
 'climate_risk_heat_stress_change_1d_above_1_std',
 'climate_risk_unseasonably_cold_change_1d_above_1_std',
 'climate_risk_unseasonably_cold_change_1d_above_1_std',
 'climate_risk_excess_precip_change_1d_above_1_std',
 'climate_risk_excess_precip_change_1d_above_1_std',
 'climate_risk_drought_change_1d_above_1_std',
 'climate_risk_drought_change_1d_above_1_std',
 'climate_risk_coldwave_change_1d_above_1_std',
 'climate_risk_coldwave_change_1d_above_1_std',
 'climate_risk_heat_stress_change_1d_above_1_std',
 'climate_risk_heat_stress_change_1d_above_1_

In [28]:
mergedf = mergedf.loc[:, ~mergedf.columns.duplicated()]

: 

In [None]:
mergedf

In [23]:
pldf = pl.from_pandas(mergedf)
del mergedf

ValueError: Pandas dataframe contains non-unique indices and/or column names. Polars dataframes require unique string names for columns.

In [None]:
mergedf = mergedf.sort_values(['region_name','date_on'])
window_period = [7, 14, 30]
features = mergedf[mergedf.columns[pd.Series(mergedf.columns).apply(\
                     lambda x: x.startswith('climate_risk'))]]
for window in window_period:
    features_lag = features.shift(periods=window)
    ## generate historial risk features with different periods
    features_lag = features_lag.rename(columns=dict(zip(features_lag.columns,
                       [c+f'_lag_{window}d'for c in features_lag.columns])\
                      ))
    mergedf = pd.concat([mergedf,features_lag],axis=1)

In [None]:
mergedf.shape

(320661, 2489)

#### Calculation Score Function

In [None]:
start = 0
end = 10000

for i in range(1, 33):
    if end > 320000:
        break
    else:
        corrtable = compute_partial_correlations(mergedf[start:end])
        sigcorr_report(corrtable).sort_values(by='avg_sig_corr', ascending=False).head(50)
        start += end
        end += end


In [None]:
features_sig = [
]


submissiondf = mergedf.dropna()
submissiondf = submissiondf[submissiondf.columns[pd.Series(submissiondf.columns).apply(\
lambda x:( not x.startswith('climate_risk')) or (x in features_sig)
)]]