# Helios Corn Futures Climate Challenge - Submission Sample

This notebook demonstrates how to:
1. Load and explore the competition dataset
2. Engineer climate risk features
3. Evaluate your approach using the CFCS metric
4. Prepare a submission

**Goal**: Create novel climate risk features that show stronger correlations with corn futures prices.

In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

print("Libraries loaded successfully!")

Libraries loaded successfully!


## 1. Load Competition Data

In [48]:
# Load main dataset
print("Loading main dataset...")
file_path_1 = '/kaggle/input/forecasting-the-future-the-helios-corn-climate-challenge/'

df = pd.read_csv(f'{file_path_1}/corn_climate_risk_futures_daily_master.csv')
df['date_on'] = pd.to_datetime(df['date_on'])

# Load regional market share data
print("Loading regional market share data...")
market_share_df = pd.read_csv(f'{file_path_1}/corn_regional_market_share.csv')

print(f"Main dataset shape: {df.shape}")
print(f"Date range: {df['date_on'].min()} to {df['date_on'].max()}")
print(f"Countries: {df['country_name'].nunique()}")
print(f"Regions: {df['region_name'].nunique()}")

df.head()

Loading main dataset...
Loading regional market share data...
Main dataset shape: (320661, 41)
Date range: 2016-01-01 00:00:00 to 2025-12-15 00:00:00
Countries: 11
Regions: 89


Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,climate_risk_cnt_locations_heat_stress_risk_low,climate_risk_cnt_locations_heat_stress_risk_medium,climate_risk_cnt_locations_heat_stress_risk_high,climate_risk_cnt_locations_unseasonably_cold_risk_low,climate_risk_cnt_locations_unseasonably_cold_risk_medium,climate_risk_cnt_locations_unseasonably_cold_risk_high,climate_risk_cnt_locations_excess_precip_risk_low,climate_risk_cnt_locations_excess_precip_risk_medium,climate_risk_cnt_locations_excess_precip_risk_high,climate_risk_cnt_locations_drought_risk_low,climate_risk_cnt_locations_drought_risk_medium,climate_risk_cnt_locations_drought_risk_high,futures_close_ZC_1,futures_close_ZC_2,futures_close_ZW_1,futures_close_ZS_1,futures_zc1_ret_pct,futures_zc1_ret_log,futures_zc_term_spread,futures_zc_term_ratio,futures_zc1_ma_20,futures_zc1_ma_60,futures_zc1_ma_120,futures_zc1_vol_20,futures_zc1_vol_60,futures_zw_zc_spread,futures_zc_zw_ratio,futures_zs_zc_spread,futures_zc_zs_ratio,date_on_year,date_on_month,date_on_year_month
0,8af42722-3f05-4ede-80fc-605e0e2b3b67,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-15,23,0,0,23,0,0,23,0,0,16,7,0,429.0,434.0,477.5,1156.0,-0.017182,-0.017331,5.0,1.011655,414.4125,387.695833,375.014583,0.01352,0.015724,48.5,0.898429,727.0,0.371107,2016,6,2016_06
1,54f4ddc5-e7ab-4bfb-ad6a-5649841af563,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-16,23,0,0,23,0,0,23,0,0,14,9,0,425.25,430.5,472.5,1134.5,-0.008741,-0.00878,5.25,1.012346,415.7,388.616667,375.5125,0.013799,0.015792,47.25,0.9,709.25,0.374835,2016,6,2016_06
2,63a41fce-d371-4295-a58a-dc6491664020,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-17,23,0,0,14,1,8,23,0,0,14,8,1,437.75,442.75,481.25,1159.5,0.029394,0.028971,5.0,1.011422,418.0875,389.770833,376.122917,0.013442,0.016145,43.5,0.90961,721.75,0.377533,2016,6,2016_06
3,cddfa440-e0eb-4735-beb1-1aca2afefe53,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-18,23,0,0,20,0,3,23,0,0,15,6,2,,,,,,,,,,,,,,,,,,2016,6,2016_06
4,3eaacfe1-29be-4da9-b5c9-a9457d2d2b83,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-19,23,0,0,21,2,0,23,0,0,16,7,0,,,,,,,,,,,,,,,,,,2016,6,2016_06


## 2. Data Exploration

In [49]:
# Identify climate risk and futures columns
climate_cols = [c for c in df.columns if c.startswith('climate_risk_')]
futures_cols = [c for c in df.columns if c.startswith('futures_')]

print(f"Climate risk columns ({len(climate_cols)}):")
for col in climate_cols:
    print(f"  - {col}")

print(f"\nFutures columns ({len(futures_cols)}):")
for col in futures_cols[:10]:  # Show first 10
    print(f"  - {col}")
if len(futures_cols) > 30:
    print(f"  ... and {len(futures_cols) - 10} more")

Climate risk columns (12):
  - climate_risk_cnt_locations_heat_stress_risk_low
  - climate_risk_cnt_locations_heat_stress_risk_medium
  - climate_risk_cnt_locations_heat_stress_risk_high
  - climate_risk_cnt_locations_unseasonably_cold_risk_low
  - climate_risk_cnt_locations_unseasonably_cold_risk_medium
  - climate_risk_cnt_locations_unseasonably_cold_risk_high
  - climate_risk_cnt_locations_excess_precip_risk_low
  - climate_risk_cnt_locations_excess_precip_risk_medium
  - climate_risk_cnt_locations_excess_precip_risk_high
  - climate_risk_cnt_locations_drought_risk_low
  - climate_risk_cnt_locations_drought_risk_medium
  - climate_risk_cnt_locations_drought_risk_high

Futures columns (17):
  - futures_close_ZC_1
  - futures_close_ZC_2
  - futures_close_ZW_1
  - futures_close_ZS_1
  - futures_zc1_ret_pct
  - futures_zc1_ret_log
  - futures_zc_term_spread
  - futures_zc_term_ratio
  - futures_zc1_ma_20
  - futures_zc1_ma_60


## 3. Baseline Feature Engineering

Let's start with some basic feature engineering approaches to establish a baseline.

### ‚ö†Ô∏è **CRITICAL NAMING REQUIREMENT**

**ALL engineered climate features must start with `climate_risk_` for the evaluation metric to detect them!**

- ‚úÖ **Correct**: `climate_risk_heat_stress_weighted`, `climate_risk_drought_ma_30d`
- ‚ùå **Wrong**: `heat_stress_risk`, `my_climate_feature`, `weather_index`

The evaluation system automatically finds features by this prefix. Incorrect naming = zero score for those features!

In [50]:
# Create a working copy
merged_daily_df = df.copy()

# Add basic time features
merged_daily_df['day_of_year'] = merged_daily_df['date_on'].dt.dayofyear
merged_daily_df['quarter'] = merged_daily_df['date_on'].dt.quarter

print("Added basic time features")
print(f"Dataset shape: {merged_daily_df.shape}")

Added basic time features
Dataset shape: (320661, 43)


## Climate Types Features
Heatwaves, Coldwaves, Flood, Wildfires, Storms

In [51]:
# Climate Risks for Wildfires, and Storms: 

merged_daily_df['climate_risk_cnt_locations_wildfires_risk_medium'] = merged_daily_df[['climate_risk_cnt_locations_heat_stress_risk_medium', 'climate_risk_cnt_locations_drought_risk_medium']].min(axis=1)
merged_daily_df['climate_risk_cnt_locations_wildfires_risk_high'] = merged_daily_df[['climate_risk_cnt_locations_heat_stress_risk_high', 'climate_risk_cnt_locations_drought_risk_high']].min(axis=1)

merged_daily_df['climate_risk_cnt_locations_storms_risk_medium'] = merged_daily_df[['climate_risk_cnt_locations_heat_stress_risk_medium', 'climate_risk_cnt_locations_excess_precip_risk_medium']].min(axis=1)
merged_daily_df['climate_risk_cnt_locations_storms_risk_high'] = merged_daily_df[['climate_risk_cnt_locations_heat_stress_risk_high', 'climate_risk_cnt_locations_excess_precip_risk_high']].min(axis=1)


In [52]:
#Climate Risk for Heat wave, Cold wave, and Flood
for i in range(1, 5):
    merged_daily_df[f'medium_heatstress_lag_{i}'] = merged_daily_df['climate_risk_cnt_locations_heat_stress_risk_medium'].shift(i)
    merged_daily_df[f'medium_coldstress_lag_{i}'] = merged_daily_df['climate_risk_cnt_locations_unseasonably_cold_risk_medium'].shift(i)
    merged_daily_df[f'medium_precip_lag_{i}'] = merged_daily_df['climate_risk_cnt_locations_excess_precip_risk_medium'].shift(i)
    
    merged_daily_df[f'medium_heatstress_lag_{i}'] = merged_daily_df[f'medium_heatstress_lag_{i}'].fillna(0)
    merged_daily_df[f'medium_coldstress_lag_{i}'] = merged_daily_df[f'medium_coldstress_lag_{i}'].fillna(0)
    merged_daily_df[f'medium_precip_lag_{i}'] = merged_daily_df[f'medium_precip_lag_{i}'].fillna(0)

for j in range(1, 3): 
    merged_daily_df[f'high_heatstress_lag_{j}'] = merged_daily_df['climate_risk_cnt_locations_heat_stress_risk_high'].shift(j)
    merged_daily_df[f'high_coldstress_lag_{j}'] = merged_daily_df['climate_risk_cnt_locations_unseasonably_cold_risk_high'].shift(j)
    merged_daily_df[f'high_precip_lag_{j}'] = merged_daily_df['climate_risk_cnt_locations_excess_precip_risk_high'].shift(j)

    merged_daily_df[f'high_heatstress_lag_{j}'] = merged_daily_df[f'high_heatstress_lag_{j}'].fillna(0)
    merged_daily_df[f'high_coldstress_lag_{j}'] = merged_daily_df[f'high_coldstress_lag_{j}'].fillna(0)
    merged_daily_df[f'high_precip_lag_{j}'] = merged_daily_df[f'high_precip_lag_{j}'].fillna(0)

merged_daily_df['total_location_by_region'] = merged_daily_df['climate_risk_cnt_locations_heat_stress_risk_low'] + merged_daily_df['climate_risk_cnt_locations_heat_stress_risk_medium'] \
                                        + merged_daily_df['climate_risk_cnt_locations_heat_stress_risk_high']

In [53]:
merged_daily_df['medium_heatstress_4days_average'] = merged_daily_df[[f'medium_heatstress_lag_{i}' for i in range(1, 5)]].mean(axis=1)
merged_daily_df['medium_heatstress_2days_average'] = merged_daily_df[[f'medium_heatstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)
merged_daily_df['high_heatstress_2days_average'] = merged_daily_df[[f'high_heatstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)

merged_daily_df['medium_coldstress_4days_average'] = merged_daily_df[[f'medium_coldstress_lag_{i}' for i in range(1, 5)]].mean(axis=1)
merged_daily_df['medium_coldstress_2days_average'] = merged_daily_df[[f'medium_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)
merged_daily_df['high_coldstress_2days_average'] = merged_daily_df[[f'high_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)

merged_daily_df['medium_precip_4days_average'] = merged_daily_df[[f'medium_precip_lag_{i}' for i in range(1, 5)]].mean(axis=1)
merged_daily_df['medium_precip_2days_average'] = merged_daily_df[[f'medium_precip_lag_{i}' for i in range(1, 3)]].mean(axis=1)
merged_daily_df['high_precip_2days_average'] = merged_daily_df[[f'high_precip_lag_{i}' for i in range(1, 3)]].mean(axis=1)

merged_daily_df['climate_risk_cnt_locations_heatwave_risk_high'] = (merged_daily_df['medium_heatstress_4days_average'] + merged_daily_df['high_heatstress_2days_average']) / 2
merged_daily_df['climate_risk_cnt_locations_heatwave_risk_medium'] = (merged_daily_df['medium_heatstress_2days_average'] + merged_daily_df['high_heatstress_lag_1']) / 2

merged_daily_df['climate_risk_cnt_locations_coldwave_risk_high'] = (merged_daily_df['medium_coldstress_4days_average'] + merged_daily_df['high_coldstress_2days_average']) / 2
merged_daily_df['climate_risk_cnt_locations_coldwave_risk_medium'] = (merged_daily_df['medium_coldstress_2days_average'] + merged_daily_df['high_coldstress_lag_1']) / 2

merged_daily_df['climate_risk_cnt_locations_flood_risk_high'] = (merged_daily_df['medium_precip_4days_average'] + merged_daily_df['high_precip_2days_average']) / 2
merged_daily_df['climate_risk_cnt_locations_flood_risk_medium'] = (merged_daily_df['medium_precip_2days_average'] + merged_daily_df['high_precip_lag_1']) / 2

## Merged Country Production with main Dataset

In [54]:
# Merge with market share data for production weighting
merged_daily_df = merged_daily_df.merge(
    market_share_df[['region_id', 'percent_country_production']], 
    on='region_id', 
    how='left'
)

# Fill missing production percentages with small default value
merged_daily_df['percent_country_production'] = merged_daily_df['percent_country_production'].fillna(1.0)

print("Merged with market share data")
print(f"Production share range: {merged_daily_df['percent_country_production'].min():.1f}% to {merged_daily_df['percent_country_production'].max():.1f}%")

Merged with market share data
Production share range: 0.0% to 73.0%


## Rolling Window

In [55]:
# Base Risk Scores
RISK_CATEGORIES = ['heat_stress', 'unseasonably_cold', 'excess_precip', 'drought', \
                   'flood', 'wildfires', 'storms', 'heatwave', 'coldwave']

ALL_NEW_FEATURES = []

for risk_type in RISK_CATEGORIES:
    low_col = f'climate_risk_cnt_locations_{risk_type}_risk_low'
    med_col = f'climate_risk_cnt_locations_{risk_type}_risk_medium' 
    high_col = f'climate_risk_cnt_locations_{risk_type}_risk_high'
    
    total = merged_daily_df['total_location_by_region']
    risk_score = (merged_daily_df[med_col] + 2 * merged_daily_df[high_col]) / (total + 1e-6)
    weighted = risk_score * (merged_daily_df['percent_country_production'] / 100)
    
    merged_daily_df[f'climate_risk_{risk_type}_score'] = risk_score
    merged_daily_df[f'climate_risk_{risk_type}_weighted'] = weighted
    ALL_NEW_FEATURES.extend([f'climate_risk_{risk_type}_score', f'climate_risk_{risk_type}_weighted'])

print(f"‚úÖ Base risk scores: {len(ALL_NEW_FEATURES)} features")

‚úÖ Base risk scores: 18 features


In [56]:
# Sort for time series operations
merged_df = merged_daily_df.sort_values(['region_id', 'date_on'])

# Rolling MA and Max (7, 14, 30, 60 days)
for window in [7, 14, 30, 60]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        # Moving Average
        ma_col = f'climate_risk_{risk_type}_ma_{window}d'
        merged_df[ma_col] = (
            merged_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=1).mean())
        )
        ALL_NEW_FEATURES.append(ma_col)
        
        # Rolling Max
        max_col = f'climate_risk_{risk_type}_max_{window}d'
        merged_df[max_col] = (
            merged_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=1).max())
        )
        ALL_NEW_FEATURES.append(max_col)

print(f"‚úÖ Rolling features: {len(ALL_NEW_FEATURES)} total")

‚úÖ Rolling features: 90 total


## Lag Features

In [57]:
# Lag features - weather today affects prices in future
for lag in [7, 14, 30]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        lag_col = f'climate_risk_{risk_type}_lag_{lag}d'
        merged_df[lag_col] = merged_df.groupby('region_id')[score_col].shift(lag)
        ALL_NEW_FEATURES.append(lag_col)

print(f"‚úÖ Lag features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Lag features added: 117 total


## Exponential Moving Averages

In [58]:
# Exponential Moving Averages
for span in [14, 30]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        ema_col = f'climate_risk_{risk_type}_ema_{span}d'
        merged_df[ema_col] = (
            merged_df.groupby('region_id')[score_col]
            .transform(lambda x: x.ewm(span=span, min_periods=1).mean())
        )
        ALL_NEW_FEATURES.append(ema_col)

print(f"‚úÖ EMA features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ EMA features added: 135 total


## Volatility Features

In [59]:
# Rolling Standard Deviation (volatility)
for window in [14, 30]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        vol_col = f'climate_risk_{risk_type}_vol_{window}d'
        merged_df[vol_col] = (
            merged_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=2).std())
        )
        ALL_NEW_FEATURES.append(vol_col)

print(f"‚úÖ Volatility features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Volatility features added: 153 total


## Cumulative Stress Features

In [60]:
# Cumulative sum (total stress over period)
for window in [30, 60]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        
        cum_col = f'climate_risk_{risk_type}_cumsum_{window}d'
        merged_df[cum_col] = (
            merged_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=1).sum())
        )
        ALL_NEW_FEATURES.append(cum_col)

print(f"‚úÖ Cumulative features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Cumulative features added: 171 total


## Non-Linear Features

In [61]:
# Non-linear transformations
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    
    # Squared - emphasizes extreme values
    sq_col = f'climate_risk_{risk_type}_squared'
    merged_df[sq_col] = merged_df[score_col] ** 2
    ALL_NEW_FEATURES.append(sq_col)
    
    # Log transform - compresses high values
    log_col = f'climate_risk_{risk_type}_log'
    merged_df[log_col] = np.log1p(merged_df[score_col])
    ALL_NEW_FEATURES.append(log_col)

print(f"‚úÖ Non-linear features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Non-linear features added: 189 total


## Interaction Features

In [62]:
# Composite indices
score_cols = [f'climate_risk_{r}_score' for r in RISK_CATEGORIES]

# Temperature stress (max of heat/cold)
merged_df['climate_risk_temperature_stress'] = merged_df[[
    'climate_risk_heat_stress_score', 'climate_risk_unseasonably_cold_score', 'climate_risk_heatwave_score', 'climate_risk_coldwave_score'
]].max(axis=1)
ALL_NEW_FEATURES.append('climate_risk_temperature_stress')

# Precipitation stress (max of wet/dry)
merged_df['climate_risk_precipitation_stress'] = merged_df[[
    'climate_risk_excess_precip_score', 'climate_risk_drought_score', 'climate_risk_wildfires_score', 'climate_risk_flood_score'
]].max(axis=1)
ALL_NEW_FEATURES.append('climate_risk_precipitation_stress')

# Overall stress (max of all)
merged_df['climate_risk_overall_stress'] = merged_df[score_cols].max(axis=1)
ALL_NEW_FEATURES.append('climate_risk_overall_stress')

# Combined stress (sum of all)
merged_df['climate_risk_combined_stress'] = merged_df[score_cols].sum(axis=1)
ALL_NEW_FEATURES.append('climate_risk_combined_stress')

# Difference features
merged_df['climate_risk_precip_drought_diff'] = (
    merged_df['climate_risk_excess_precip_score'] - merged_df['climate_risk_drought_score']
)
ALL_NEW_FEATURES.append('climate_risk_precip_drought_diff')

merged_df['climate_risk_extremedry_diff'] = (
    merged_df['climate_risk_wildfires_score'] - merged_df['climate_risk_flood_score']
)
ALL_NEW_FEATURES.append('climate_risk_extremedry_diff')

merged_df['climate_risk_temp_diff'] = (
    merged_df['climate_risk_heat_stress_score'] - merged_df['climate_risk_unseasonably_cold_score']
)
ALL_NEW_FEATURES.append('climate_risk_temp_diff')

merged_df['climate_risk_hotdisaster_diff'] = (
    merged_df['climate_risk_heatwave_score'] - merged_df['climate_risk_coldwave_score']
)
ALL_NEW_FEATURES.append('climate_risk_hotdisaster_diff')

# Ratio features
merged_df['climate_risk_precip_drought_ratio'] = (
    merged_df['climate_risk_excess_precip_score'] / 
    (merged_df['climate_risk_drought_score'] + 0.01)
)
ALL_NEW_FEATURES.append('climate_risk_precip_drought_ratio')

merged_df['climate_risk_fires_flood_ratio'] = (
    merged_df['climate_risk_wildfires_score'] / 
    (merged_df['climate_risk_flood_score'] + 0.01)
)
ALL_NEW_FEATURES.append('climate_risk_fires_flood_ratio')

merged_df['climate_risk_temp_ratio'] = (
    merged_df['climate_risk_heat_stress_score'] / 
    (merged_df['climate_risk_unseasonably_cold_score'] + 0.01)
)
ALL_NEW_FEATURES.append('climate_risk_temp_ratio')

merged_df['climate_risk_heatcold_wave_ratio'] = (
    merged_df['climate_risk_heatwave_score'] / 
    (merged_df['climate_risk_coldwave_score'] + 0.01)
)
ALL_NEW_FEATURES.append('climate_risk_heatcold_wave_ratio')

print(f"‚úÖ Interaction features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Interaction features added: 201 total


## Seasonal Features

In [63]:
# Cyclical encoding of day of year
merged_df['climate_risk_season_sin'] = np.sin(2 * np.pi * merged_df['day_of_year'] / 365)
merged_df['climate_risk_season_cos'] = np.cos(2 * np.pi * merged_df['day_of_year'] / 365)
ALL_NEW_FEATURES.extend(['climate_risk_season_sin', 'climate_risk_season_cos'])

# Growing season weighted risk (Q2-Q3 higher weight)
growing_season_weight = merged_df['quarter'].map({1: 0.5, 2: 1.0, 3: 1.0, 4: 0.5})

for risk_type in ['drought', 'excess_precip', 'wildfires', 'storms', 'heatwave', 'coldwave']:  # Most relevant for growing season
    score_col = f'climate_risk_{risk_type}_score'
    seasonal_col = f'climate_risk_{risk_type}_seasonal'
    merged_df[seasonal_col] = merged_df[score_col] * growing_season_weight
    ALL_NEW_FEATURES.append(seasonal_col)

print(f"‚úÖ Seasonal features added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Seasonal features added: 209 total


## Momentum Features

In [None]:
# Momentum/change features
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    
    # Daily change
    c1 = f'climate_risk_{risk_type}_change_1d'
    merged_df[c1] = merged_df.groupby('region_id')[score_col].diff(1)
    ALL_NEW_FEATURES.append(c1)
    
    # Weekly change
    c7 = f'climate_risk_{risk_type}_change_7d'
    merged_df[c7] = merged_df.groupby('region_id')[score_col].diff(7)
    ALL_NEW_FEATURES.append(c7)
    
    # Acceleration
    acc = f'climate_risk_{risk_type}_daily_acceleration'
    merged_df[acc] = merged_df.groupby('region_id')[c1].diff(1)
    ALL_NEW_FEATURES.append(acc)


print(f"‚úÖ Momentum features added: {len(ALL_NEW_FEATURES)} total")


‚úÖ Momentum features added: 236 total


## Country Aggregation

In [65]:
# Country-level aggregations
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    weighted_col = f'climate_risk_{risk_type}_weighted'
    
    country_agg = merged_df.groupby(['country_name', 'date_on']).agg({
        score_col: ['mean', 'max', 'std'],
        weighted_col: 'sum',
        'percent_country_production': 'sum'
    }).round(4)
    
    country_agg.columns = [f'country_{risk_type}_{"_".join(col).strip()}' for col in country_agg.columns]
    country_agg = country_agg.reset_index()
    
    new_cols = [c for c in country_agg.columns if c not in ['country_name', 'date_on']]
    ALL_NEW_FEATURES.extend(new_cols)
    
    merged_df = merged_df.merge(country_agg, on=['country_name', 'date_on'], how='left')

print(f"‚úÖ Country aggregations added: {len(ALL_NEW_FEATURES)} total")

‚úÖ Country aggregations added: 281 total


## Rows Matching

In [66]:
REQUIRED_ROWS = 219161

print(f"\nüìä Before NaN handling: {len(merged_df):,} rows")

# Step 1: Identify valid IDs by simulating sample submission's approach
print("üìä Identifying valid IDs (simulating sample submission)...")

# Start fresh from original data
temp_df = pd.read_csv(f'{file_path_1}/corn_climate_risk_futures_daily_master.csv')
temp_df['date_on'] = pd.to_datetime(temp_df['date_on'])

# Add basic features (same as sample submission)
temp_df['day_of_year'] = temp_df['date_on'].dt.dayofyear
temp_df['quarter'] = temp_df['date_on'].dt.quarter

# Merge market share
temp_df = temp_df.merge(
    market_share_df[['region_id', 'percent_country_production']], 
    on='region_id', how='left'
)
temp_df['percent_country_production'] = temp_df['percent_country_production'].fillna(1.0)


üìä Before NaN handling: 320,661 rows
üìä Identifying valid IDs (simulating sample submission)...


In [67]:
# Climate Risks for Wildfires, and Storms: 

temp_df['climate_risk_cnt_locations_wildfires_risk_medium'] = temp_df[['climate_risk_cnt_locations_heat_stress_risk_medium', 'climate_risk_cnt_locations_drought_risk_medium']].min(axis=1)
temp_df['climate_risk_cnt_locations_wildfires_risk_high'] = temp_df[['climate_risk_cnt_locations_heat_stress_risk_high', 'climate_risk_cnt_locations_drought_risk_high']].min(axis=1)

temp_df['climate_risk_cnt_locations_storms_risk_medium'] = temp_df[['climate_risk_cnt_locations_heat_stress_risk_medium', 'climate_risk_cnt_locations_excess_precip_risk_medium']].min(axis=1)
temp_df['climate_risk_cnt_locations_storms_risk_high'] = temp_df[['climate_risk_cnt_locations_heat_stress_risk_high', 'climate_risk_cnt_locations_excess_precip_risk_high']].min(axis=1)

#Climate Risk for Heat wave, Cold wave, and Flood
for i in range(1, 5):
    temp_df[f'medium_heatstress_lag_{i}'] = temp_df['climate_risk_cnt_locations_heat_stress_risk_medium'].shift(i)
    temp_df[f'medium_coldstress_lag_{i}'] = temp_df['climate_risk_cnt_locations_unseasonably_cold_risk_medium'].shift(i)
    temp_df[f'medium_precip_lag_{i}'] = temp_df['climate_risk_cnt_locations_excess_precip_risk_medium'].shift(i)
    
    temp_df[f'medium_heatstress_lag_{i}'] = temp_df[f'medium_heatstress_lag_{i}'].fillna(0)
    temp_df[f'medium_coldstress_lag_{i}'] = temp_df[f'medium_coldstress_lag_{i}'].fillna(0)
    temp_df[f'medium_precip_lag_{i}'] = temp_df[f'medium_precip_lag_{i}'].fillna(0)

for j in range(1, 3): 
    temp_df[f'high_heatstress_lag_{j}'] = temp_df['climate_risk_cnt_locations_heat_stress_risk_high'].shift(j)
    temp_df[f'high_coldstress_lag_{j}'] = temp_df['climate_risk_cnt_locations_unseasonably_cold_risk_high'].shift(j)
    temp_df[f'high_precip_lag_{j}'] = temp_df['climate_risk_cnt_locations_excess_precip_risk_high'].shift(j)

    temp_df[f'high_heatstress_lag_{j}'] = temp_df[f'high_heatstress_lag_{j}'].fillna(0)
    temp_df[f'high_coldstress_lag_{j}'] = temp_df[f'high_coldstress_lag_{j}'].fillna(0)
    temp_df[f'high_precip_lag_{j}'] = temp_df[f'high_precip_lag_{j}'].fillna(0)

temp_df['total_location_by_region'] = temp_df['climate_risk_cnt_locations_heat_stress_risk_low'] + temp_df['climate_risk_cnt_locations_heat_stress_risk_medium'] \
                                        + temp_df['climate_risk_cnt_locations_heat_stress_risk_high']

temp_df['medium_heatstress_4days_average'] = temp_df[[f'medium_heatstress_lag_{i}' for i in range(1, 5)]].mean(axis=1)
temp_df['medium_heatstress_2days_average'] = temp_df[[f'medium_heatstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)
temp_df['high_heatstress_2days_average'] = temp_df[[f'high_heatstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)

temp_df['medium_coldstress_4days_average'] = temp_df[[f'medium_coldstress_lag_{i}' for i in range(1, 5)]].mean(axis=1)
temp_df['medium_coldstress_2days_average'] = temp_df[[f'medium_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)
temp_df['high_coldstress_2days_average'] = temp_df[[f'high_coldstress_lag_{i}' for i in range(1, 3)]].mean(axis=1)

temp_df['medium_precip_4days_average'] = temp_df[[f'medium_precip_lag_{i}' for i in range(1, 5)]].mean(axis=1)
temp_df['medium_precip_2days_average'] = temp_df[[f'medium_precip_lag_{i}' for i in range(1, 3)]].mean(axis=1)
temp_df['high_precip_2days_average'] = temp_df[[f'high_precip_lag_{i}' for i in range(1, 3)]].mean(axis=1)

temp_df['climate_risk_cnt_locations_heatwave_risk_high'] = (temp_df['medium_heatstress_4days_average'] + temp_df['high_heatstress_2days_average']) / 2
temp_df['climate_risk_cnt_locations_heatwave_risk_medium'] = (temp_df['medium_heatstress_2days_average'] + temp_df['high_heatstress_lag_1']) / 2

temp_df['climate_risk_cnt_locations_coldwave_risk_high'] = (temp_df['medium_coldstress_4days_average'] + temp_df['high_coldstress_2days_average']) / 2
temp_df['climate_risk_cnt_locations_coldwave_risk_medium'] = (temp_df['medium_coldstress_2days_average'] + temp_df['high_coldstress_lag_1']) / 2

temp_df['climate_risk_cnt_locations_flood_risk_high'] = (temp_df['medium_precip_4days_average'] + temp_df['high_precip_2days_average']) / 2
temp_df['climate_risk_cnt_locations_flood_risk_medium'] = (temp_df['medium_precip_2days_average'] + temp_df['high_precip_lag_1']) / 2

In [68]:
for risk_type in RISK_CATEGORIES:
    #low_col = f'climate_risk_cnt_locations_{risk_type}_risk_low'
    med_col = f'climate_risk_cnt_locations_{risk_type}_risk_medium' 
    high_col = f'climate_risk_cnt_locations_{risk_type}_risk_high'
    
    total = temp_df['total_location_by_region']
    risk_score = (temp_df[med_col] + 2 * temp_df[high_col]) / (total + 1e-6)
    weighted = risk_score * (temp_df['percent_country_production'] / 100)
    
    temp_df[f'climate_risk_{risk_type}_score'] = risk_score
    temp_df[f'climate_risk_{risk_type}_weighted'] = weighted

# Create composite indices
score_cols = [f'climate_risk_{r}_score' for r in RISK_CATEGORIES]
temp_df['climate_risk_temperature_stress'] = temp_df[['climate_risk_heat_stress_score', 'climate_risk_unseasonably_cold_score']].max(axis=1)
temp_df['climate_risk_precipitation_stress'] = temp_df[['climate_risk_excess_precip_score', 'climate_risk_drought_score']].max(axis=1)
temp_df['climate_risk_overall_stress'] = temp_df[score_cols].max(axis=1)
temp_df['climate_risk_combined_stress'] = temp_df[score_cols].mean(axis=1)

# Sort for rolling operations
temp_df = temp_df.sort_values(['region_id', 'date_on'])

# Create rolling features (7, 14, 30 days - same as sample submission)
for window in [7, 14, 30]:
    for risk_type in RISK_CATEGORIES:
        score_col = f'climate_risk_{risk_type}_score'
        temp_df[f'climate_risk_{risk_type}_ma_{window}d'] = (
            temp_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=1).mean())
        )
        temp_df[f'climate_risk_{risk_type}_max_{window}d'] = (
            temp_df.groupby('region_id')[score_col]
            .transform(lambda x: x.rolling(window, min_periods=1).max())
        )

# Create momentum features (same as sample submission)
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    temp_df[f'climate_risk_{risk_type}_change_1d'] = temp_df.groupby('region_id')[score_col].diff(1)
    temp_df[f'climate_risk_{risk_type}_change_7d'] = temp_df.groupby('region_id')[score_col].diff(7)
    temp_df[f'climate_risk_{risk_type}_acceleration'] = temp_df.groupby('region_id')[f'climate_risk_{risk_type}_change_1d'].diff(1)

# Create country aggregations (same as sample submission)
for risk_type in RISK_CATEGORIES:
    score_col = f'climate_risk_{risk_type}_score'
    weighted_col = f'climate_risk_{risk_type}_weighted'
    
    country_agg = temp_df.groupby(['country_name', 'date_on']).agg({
        score_col: ['mean', 'max', 'std'],
        weighted_col: 'sum',
        'percent_country_production': 'sum'
    }).round(4)
    
    country_agg.columns = [f'country_{risk_type}_{"_".join(col).strip()}' for col in country_agg.columns]
    country_agg = country_agg.reset_index()
    
    temp_df = temp_df.merge(country_agg, on=['country_name', 'date_on'], how='left')


In [69]:
# Now dropna to get valid IDs (this is what sample submission does)
valid_ids = temp_df.dropna()['ID'].tolist()
print(f"üìä Valid IDs from sample submission approach: {len(valid_ids):,}")

# Clean up
del temp_df


üìä Valid IDs from sample submission approach: 219,161


In [70]:
# Step 2: Fill all engineered features in merged_df with 0
print("üìä Filling engineered features with 0...")

for col in ALL_NEW_FEATURES:
    if col in merged_df.columns:
        merged_df[col] = merged_df[col].fillna(0)

# Also fill any remaining NaN in climate_risk columns
climate_cols = [c for c in merged_df.columns if c.startswith('climate_risk_')]
for col in climate_cols:
    if merged_df[col].isna().any():
        merged_df[col] = merged_df[col].fillna(0)

# Step 3: Filter to valid IDs
print("üìä Filtering to valid IDs...")

# First, drop rows with NaN in futures columns (non-trading days)
futures_cols = [c for c in merged_df.columns if c.startswith('futures_')]
baseline_df = merged_df.dropna(subset=futures_cols)

# Then filter to only valid IDs
baseline_df = baseline_df[baseline_df['ID'].isin(valid_ids)]

print(f"üìä After NaN handling: {len(baseline_df):,} rows")
print(f"üìä Expected rows: {REQUIRED_ROWS:,}")
print(f"üìä Match: {'‚úÖ' if len(baseline_df) == REQUIRED_ROWS else '‚ùå'}")
print(f"üìä Total new features: {len(ALL_NEW_FEATURES)}")

# Final verification
if len(baseline_df) != REQUIRED_ROWS:
    diff = len(baseline_df) - REQUIRED_ROWS
    print(f"\n‚ö†Ô∏è Row count difference: {diff:+d}")

üìä Filling engineered features with 0...
üìä Filtering to valid IDs...
üìä After NaN handling: 219,161 rows
üìä Expected rows: 219,161
üìä Match: ‚úÖ
üìä Total new features: 281


## Feature Analysis

In [71]:
def analyze_feature_contributions(df, climate_cols, futures_cols):
    """
    Analyze contribution of each climate feature.
    Returns DataFrame with sig_count, max_corr, etc for each feature.
    """
    feature_stats = {col: {'sig_count': 0, 'total': 0, 'max_corr': 0, 'sig_corrs': []} 
                     for col in climate_cols}
    
    for country in df['country_name'].unique():
        df_country = df[df['country_name'] == country]
        
        for month in df_country['date_on_month'].unique():
            df_month = df_country[df_country['date_on_month'] == month]
            
            for clim in climate_cols:
                for fut in futures_cols:
                    if df_month[clim].std() > 0 and df_month[fut].std() > 0:
                        corr = df_month[[clim, fut]].corr().iloc[0, 1]
                        
                        feature_stats[clim]['total'] += 1
                        
                        if abs(corr) >= 0.5:
                            feature_stats[clim]['sig_count'] += 1
                            feature_stats[clim]['sig_corrs'].append(abs(corr))
                        
                        if abs(corr) > feature_stats[clim]['max_corr']:
                            feature_stats[clim]['max_corr'] = abs(corr)
    
    results = []
    for col, stats in feature_stats.items():
        avg_sig = np.mean(stats['sig_corrs']) if stats['sig_corrs'] else 0
        results.append({
            'feature': col,
            'sig_count': stats['sig_count'],
            'total': stats['total'],
            'sig_pct': stats['sig_count'] / stats['total'] * 100 if stats['total'] > 0 else 0,
            'max_corr': round(stats['max_corr'], 4),
            'avg_sig_corr': round(avg_sig, 4)
        })
    
    return pd.DataFrame(results).sort_values('sig_count', ascending=False)

print("‚úÖ Helper functions defined")

‚úÖ Helper functions defined


In [72]:
# Analyze feature contributions
print("üìä Analyzing feature contributions (this takes ~3 minutes)...")

climate_cols = [c for c in baseline_df.columns if c.startswith('climate_risk_')]
futures_cols = [c for c in baseline_df.columns if c.startswith('futures_')]

print(f"   Climate features: {len(climate_cols)}")
print(f"   Futures features: {len(futures_cols)}")

feature_analysis = analyze_feature_contributions(baseline_df, climate_cols, futures_cols)

üìä Analyzing feature contributions (this takes ~3 minutes)...
   Climate features: 258
   Futures features: 17


In [73]:
# Show top features
print("\nüîù TOP 25 Features by Significant Correlation Count:")
print("="*80)
print(feature_analysis.head(25).to_string(index=False))


üîù TOP 25 Features by Significant Correlation Count:
                              feature  sig_count  total  sig_pct  max_corr  avg_sig_corr
        climate_risk_flood_cumsum_60d         64   2244 2.852050    0.6520        0.5559
            climate_risk_flood_ma_60d         64   2244 2.852050    0.6520        0.5571
          climate_risk_drought_ma_60d         54   2244 2.406417    0.7336        0.5992
      climate_risk_drought_cumsum_60d         53   2244 2.361854    0.7336        0.6029
    climate_risk_excess_precip_ma_60d         48   2244 2.139037    0.6126        0.5434
climate_risk_excess_precip_cumsum_60d         47   2244 2.094474    0.6126        0.5463
         climate_risk_drought_ema_30d         42   2244 1.871658    0.7081        0.5893
      climate_risk_drought_cumsum_30d         41   2244 1.827094    0.7243        0.5934
        climate_risk_coldwave_max_60d         39   2023 1.927830    0.7002        0.5664
          climate_risk_drought_ma_30d         39   224

In [74]:
# Show bottom features (candidates for removal)
print("\n‚ùå BOTTOM 25 Features (candidates for removal):")
print("="*80)
print(feature_analysis.tail(25).to_string(index=False))


‚ùå BOTTOM 25 Features (candidates for removal):
                                              feature  sig_count  total  sig_pct  max_corr  avg_sig_corr
                    climate_risk_precip_drought_ratio          0   2244      0.0    0.3911           0.0
                        climate_risk_hotdisaster_diff          0   2193      0.0    0.3649           0.0
                               climate_risk_temp_diff          0   2193      0.0    0.3132           0.0
                         climate_risk_extremedry_diff          0   2244      0.0    0.4797           0.0
                         climate_risk_combined_stress          0   2244      0.0    0.3725           0.0
                          climate_risk_overall_stress          0   2244      0.0    0.3590           0.0
                    climate_risk_precipitation_stress          0   2244      0.0    0.3650           0.0
                      climate_risk_temperature_stress          0   2193      0.0    0.3465           0.0
     

In [77]:
# Identify features to remove
zero_sig_features = feature_analysis[feature_analysis['sig_count'] == 0]['feature'].tolist()

# Keep original cnt_locations columns (required by competition)
original_cols = [c for c in zero_sig_features if 'cnt_locations' in c]
FEATURES_TO_REMOVE = [c for c in zero_sig_features if c not in original_cols]

print(f"\nüìä Feature Selection Summary:")
print(f"   Total climate features: {len(climate_cols)}")
print(f"   Features with 0 significant correlations: {len(zero_sig_features)}")
print(f"   Features to remove: {len(FEATURES_TO_REMOVE)}")
print(f"   Total significant correlations: {feature_analysis['sig_count'].sum()}")


üìä Feature Selection Summary:
   Total climate features: 258
   Features with 0 significant correlations: 162
   Features to remove: 140
   Total significant correlations: 1255


In [78]:
# Create optimized dataset by removing weak features
optimized_df = baseline_df.copy()

cols_before = len([c for c in optimized_df.columns if c.startswith('climate_risk_')])
optimized_df = optimized_df.drop(columns=FEATURES_TO_REMOVE, errors='ignore')
cols_after = len([c for c in optimized_df.columns if c.startswith('climate_risk_')])

print(f"üìä Climate features: {cols_before} ‚Üí {cols_after} (removed {cols_before - cols_after})")

üìä Climate features: 258 ‚Üí 118 (removed 140)


## 4. Evaluation Test

In [79]:
def compute_cfcs(df, verbose=True):
    """
    Compute CFCS score for a dataframe.
    CFCS = (0.5 √ó Avg_Sig_Corr) + (0.3 √ó Max_Corr) + (0.2 √ó Sig_Count%)
    """
    climate_cols = [c for c in df.columns if c.startswith("climate_risk_")]
    futures_cols = [c for c in df.columns if c.startswith("futures_")]
    
    correlations = []
    
    for country in df['country_name'].unique():
        df_country = df[df['country_name'] == country]
        
        for month in df_country['date_on_month'].unique():
            df_month = df_country[df_country['date_on_month'] == month]
            
            for clim in climate_cols:
                for fut in futures_cols:
                    if df_month[clim].std() > 0 and df_month[fut].std() > 0:
                        corr = df_month[[clim, fut]].corr().iloc[0, 1]
                        correlations.append(corr)
    
    correlations = pd.Series(correlations).dropna()
    abs_corrs = correlations.abs()
    sig_corrs = abs_corrs[abs_corrs >= 0.5]
    
    avg_sig = sig_corrs.mean() if len(sig_corrs) > 0 else 0
    max_corr = abs_corrs.max() if len(abs_corrs) > 0 else 0
    sig_pct = len(sig_corrs) / len(correlations) * 100 if len(correlations) > 0 else 0
    
    avg_sig_score = min(100, avg_sig * 100)
    max_score = min(100, max_corr * 100)
    
    cfcs = (0.5 * avg_sig_score) + (0.3 * max_score) + (0.2 * sig_pct)
    
    result = {
        'cfcs': round(cfcs, 2),
        'avg_sig_corr': round(avg_sig, 4),
        'max_corr': round(max_corr, 4),
        'sig_count': len(sig_corrs),
        'total': len(correlations),
        'sig_pct': round(sig_pct, 4),
        'n_features': len(climate_cols)
    }
    
    if verbose:
        print(f"CFCS: {result['cfcs']} | Sig: {result['sig_count']}/{result['total']} ({result['sig_pct']:.2f}%) | Features: {result['n_features']}")
    
    return result

In [80]:
print("üìä Computing CFCS scores...\n")

print("Baseline (all features):")
baseline_score = compute_cfcs(baseline_df)

print("\nOptimized (weak features removed):")
optimized_score = compute_cfcs(optimized_df)

improvement = optimized_score['cfcs'] - baseline_score['cfcs']
print(f"\n{'üìà IMPROVEMENT!' if improvement > 0 else 'üìâ No improvement'}")
print(f"   Delta: {improvement:+.2f}")

üìä Computing CFCS scores...

Baseline (all features):
CFCS: 49.8 | Sig: 1255/469319 (0.27%) | Features: 258

Optimized (weak features removed):
CFCS: 49.86 | Sig: 1255/231642 (0.54%) | Features: 118

üìà IMPROVEMENT!
   Delta: +0.06


In [82]:
# Validation
REQUIRED_ROWS = 219161
submission = optimized_df.copy()

# Safety: fill any remaining nulls
if submission.isnull().sum().sum() > 0:
    print("‚ö†Ô∏è Filling remaining nulls with 0...")
    submission = submission.fillna(0)

print("\n" + "="*60)
print("‚úÖ SUBMISSION VALIDATION")
print("="*60)

checks = [
    ('Row count', len(submission) == REQUIRED_ROWS, f"{len(submission):,}/{REQUIRED_ROWS:,}"),
    ('ID column', 'ID' in submission.columns, str('ID' in submission.columns)),
    ('No nulls', submission.isnull().sum().sum() == 0, f"{submission.isnull().sum().sum()} nulls"),
]

for name, passed, detail in checks:
    print(f"{'‚úÖ' if passed else '‚ùå'} {name}: {detail}")

print("="*60)


‚úÖ SUBMISSION VALIDATION
‚úÖ Row count: 219,161/219,161
‚úÖ ID column: True
‚úÖ No nulls: 0 nulls


## 5. Submit to Competition

In [83]:
# Must drop NAs in order for submission to go through. 
optimized_df_copy = optimized_df.dropna()
optimized_df_copy.head()

Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,climate_risk_cnt_locations_heat_stress_risk_low,climate_risk_cnt_locations_heat_stress_risk_medium,climate_risk_cnt_locations_heat_stress_risk_high,climate_risk_cnt_locations_unseasonably_cold_risk_low,climate_risk_cnt_locations_unseasonably_cold_risk_medium,climate_risk_cnt_locations_unseasonably_cold_risk_high,climate_risk_cnt_locations_excess_precip_risk_low,climate_risk_cnt_locations_excess_precip_risk_medium,climate_risk_cnt_locations_excess_precip_risk_high,climate_risk_cnt_locations_drought_risk_low,climate_risk_cnt_locations_drought_risk_medium,climate_risk_cnt_locations_drought_risk_high,futures_close_ZC_1,futures_close_ZC_2,futures_close_ZW_1,futures_close_ZS_1,futures_zc1_ret_pct,futures_zc1_ret_log,futures_zc_term_spread,futures_zc_term_ratio,futures_zc1_ma_20,futures_zc1_ma_60,futures_zc1_ma_120,futures_zc1_vol_20,futures_zc1_vol_60,futures_zw_zc_spread,futures_zc_zw_ratio,futures_zs_zc_spread,futures_zc_zs_ratio,date_on_year,date_on_month,date_on_year_month,day_of_year,quarter,climate_risk_cnt_locations_wildfires_risk_medium,climate_risk_cnt_locations_wildfires_risk_high,climate_risk_cnt_locations_storms_risk_medium,climate_risk_cnt_locations_storms_risk_high,medium_heatstress_lag_1,medium_coldstress_lag_1,medium_precip_lag_1,medium_heatstress_lag_2,medium_coldstress_lag_2,medium_precip_lag_2,medium_heatstress_lag_3,medium_coldstress_lag_3,medium_precip_lag_3,medium_heatstress_lag_4,medium_coldstress_lag_4,medium_precip_lag_4,high_heatstress_lag_1,high_coldstress_lag_1,high_precip_lag_1,high_heatstress_lag_2,high_coldstress_lag_2,high_precip_lag_2,total_location_by_region,medium_heatstress_4days_average,medium_heatstress_2days_average,high_heatstress_2days_average,medium_coldstress_4days_average,medium_coldstress_2days_average,high_coldstress_2days_average,medium_precip_4days_average,medium_precip_2days_average,high_precip_2days_average,climate_risk_cnt_locations_heatwave_risk_high,climate_risk_cnt_locations_heatwave_risk_medium,climate_risk_cnt_locations_coldwave_risk_high,climate_risk_cnt_locations_coldwave_risk_medium,climate_risk_cnt_locations_flood_risk_high,climate_risk_cnt_locations_flood_risk_medium,percent_country_production,climate_risk_excess_precip_weighted,climate_risk_excess_precip_ma_7d,climate_risk_excess_precip_max_7d,climate_risk_drought_ma_7d,climate_risk_drought_max_7d,climate_risk_flood_ma_7d,climate_risk_flood_max_7d,climate_risk_heat_stress_max_14d,climate_risk_excess_precip_ma_14d,climate_risk_excess_precip_max_14d,climate_risk_drought_ma_14d,climate_risk_drought_max_14d,climate_risk_flood_ma_14d,climate_risk_flood_max_14d,climate_risk_wildfires_ma_14d,climate_risk_wildfires_max_14d,climate_risk_heatwave_max_14d,climate_risk_coldwave_ma_14d,climate_risk_coldwave_max_14d,climate_risk_heat_stress_ma_30d,climate_risk_heat_stress_max_30d,climate_risk_unseasonably_cold_ma_30d,climate_risk_unseasonably_cold_max_30d,climate_risk_excess_precip_ma_30d,climate_risk_excess_precip_max_30d,climate_risk_drought_ma_30d,climate_risk_drought_max_30d,climate_risk_flood_ma_30d,climate_risk_flood_max_30d,climate_risk_wildfires_ma_30d,climate_risk_wildfires_max_30d,climate_risk_coldwave_ma_30d,climate_risk_coldwave_max_30d,climate_risk_heat_stress_ma_60d,climate_risk_heat_stress_max_60d,climate_risk_unseasonably_cold_ma_60d,climate_risk_unseasonably_cold_max_60d,climate_risk_excess_precip_ma_60d,climate_risk_excess_precip_max_60d,climate_risk_drought_ma_60d,climate_risk_drought_max_60d,climate_risk_flood_ma_60d,climate_risk_flood_max_60d,climate_risk_wildfires_ma_60d,climate_risk_wildfires_max_60d,climate_risk_heatwave_ma_60d,climate_risk_heatwave_max_60d,climate_risk_coldwave_ma_60d,climate_risk_coldwave_max_60d,climate_risk_excess_precip_lag_7d,climate_risk_drought_lag_7d,climate_risk_drought_lag_14d,climate_risk_excess_precip_ema_14d,climate_risk_drought_ema_14d,climate_risk_flood_ema_14d,climate_risk_wildfires_ema_14d,climate_risk_coldwave_ema_14d,climate_risk_unseasonably_cold_ema_30d,climate_risk_excess_precip_ema_30d,climate_risk_drought_ema_30d,climate_risk_flood_ema_30d,climate_risk_wildfires_ema_30d,climate_risk_coldwave_ema_30d,climate_risk_unseasonably_cold_vol_14d,climate_risk_excess_precip_vol_14d,climate_risk_drought_vol_14d,climate_risk_wildfires_vol_14d,climate_risk_heatwave_vol_14d,climate_risk_coldwave_vol_14d,climate_risk_heat_stress_vol_30d,climate_risk_unseasonably_cold_vol_30d,climate_risk_excess_precip_vol_30d,climate_risk_drought_vol_30d,climate_risk_flood_vol_30d,climate_risk_wildfires_vol_30d,climate_risk_heatwave_vol_30d,climate_risk_coldwave_vol_30d,climate_risk_heat_stress_cumsum_30d,climate_risk_unseasonably_cold_cumsum_30d,climate_risk_excess_precip_cumsum_30d,climate_risk_drought_cumsum_30d,climate_risk_flood_cumsum_30d,climate_risk_wildfires_cumsum_30d,climate_risk_coldwave_cumsum_30d,climate_risk_heat_stress_cumsum_60d,climate_risk_unseasonably_cold_cumsum_60d,climate_risk_excess_precip_cumsum_60d,climate_risk_drought_cumsum_60d,climate_risk_flood_cumsum_60d,climate_risk_wildfires_cumsum_60d,climate_risk_heatwave_cumsum_60d,climate_risk_coldwave_cumsum_60d,climate_risk_excess_precip_log,climate_risk_flood_log,climate_risk_precip_drought_diff,climate_risk_season_cos,country_heat_stress_climate_risk_heat_stress_score_mean,country_heat_stress_climate_risk_heat_stress_score_max,country_heat_stress_climate_risk_heat_stress_score_std,country_heat_stress_climate_risk_heat_stress_weighted_sum,country_heat_stress_percent_country_production_sum,country_unseasonably_cold_climate_risk_unseasonably_cold_score_mean,country_unseasonably_cold_climate_risk_unseasonably_cold_score_max,country_unseasonably_cold_climate_risk_unseasonably_cold_score_std,country_unseasonably_cold_climate_risk_unseasonably_cold_weighted_sum,country_unseasonably_cold_percent_country_production_sum,country_excess_precip_climate_risk_excess_precip_score_mean,country_excess_precip_climate_risk_excess_precip_score_max,country_excess_precip_climate_risk_excess_precip_score_std,country_excess_precip_climate_risk_excess_precip_weighted_sum,country_excess_precip_percent_country_production_sum,country_drought_climate_risk_drought_score_mean,country_drought_climate_risk_drought_score_max,country_drought_climate_risk_drought_score_std,country_drought_climate_risk_drought_weighted_sum,country_drought_percent_country_production_sum,country_flood_climate_risk_flood_score_mean,country_flood_climate_risk_flood_score_max,country_flood_climate_risk_flood_score_std,country_flood_climate_risk_flood_weighted_sum,country_flood_percent_country_production_sum,country_wildfires_climate_risk_wildfires_score_mean,country_wildfires_climate_risk_wildfires_score_max,country_wildfires_climate_risk_wildfires_score_std,country_wildfires_climate_risk_wildfires_weighted_sum,country_wildfires_percent_country_production_sum,country_storms_climate_risk_storms_score_mean,country_storms_climate_risk_storms_score_max,country_storms_climate_risk_storms_score_std,country_storms_climate_risk_storms_weighted_sum,country_storms_percent_country_production_sum,country_heatwave_climate_risk_heatwave_score_mean,country_heatwave_climate_risk_heatwave_score_max,country_heatwave_climate_risk_heatwave_score_std,country_heatwave_climate_risk_heatwave_weighted_sum,country_heatwave_percent_country_production_sum,country_coldwave_climate_risk_coldwave_score_mean,country_coldwave_climate_risk_coldwave_score_max,country_coldwave_climate_risk_coldwave_score_std,country_coldwave_climate_risk_coldwave_weighted_sum,country_coldwave_percent_country_production_sum
7,36bcf707-3c9a-4516-a20b-eaaaff8ee81c,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-08,1,0,0,0,0,1,1,0,0,1,0,0,357.0,362.75,478.5,879.5,0.011331,0.011268,5.75,1.016106,365.1125,368.979167,372.45625,0.010629,0.010832,121.5,0.746082,522.5,0.405912,2016,1,2016_01,8,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.249999,1.499999,0.0,0.0,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.249999,1.499999,0.0,0.0,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.249999,1.499999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.350815,1.999998,0.0,0.0,0.0,0.0,1.300962,0.0,0.0,0.0,0.0,0.0,0.534522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.534522,0.0,15.999984,0.0,0.0,0.0,0.0,9.99999,0.0,15.999984,0.0,0.0,0.0,0.0,0.0,9.99999,0.0,0.0,0.0,0.990532,0.0,0.0,0.0,0.0,89.0,1.8831,2.0,0.3667,1.539,89.0,0.1982,1.6667,0.4192,0.2824,89.0,0.18,2.0,0.5568,0.0875,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,1.529,2.0,0.1218,1.3164,89.0
10,a7c6b1e6-6f03-4d3b-be88-0b607303c97a,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-11,1,0,0,0,0,1,0,0,1,1,0,0,351.75,357.5,469.0,881.0,-0.014706,-0.014815,5.75,1.016347,363.8375,368.525,372.0,0.010219,0.01084,117.25,0.75,529.25,0.399262,2016,1,2016_01,11,1,0,0,0,0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.5,1.0,0.0,0.0,0.0,0.5,0.5,0.25,0.5,1.0,0.02,0.571428,1.999998,0.0,0.0,0.214286,0.999999,0.0,0.363636,1.999998,0.0,0.0,0.136364,0.999999,0.0,0.0,0.0,1.318181,1.499999,0.0,0.0,1.999998,1.999998,0.363636,1.999998,0.0,0.0,0.136364,0.999999,0.0,0.0,1.318181,1.499999,0.0,0.0,1.999998,1.999998,0.363636,1.999998,0.0,0.0,0.136364,0.999999,0.0,0.0,0.0,0.0,1.318181,1.499999,0.0,0.0,0.0,0.608432,0.0,0.241056,0.0,1.416494,1.999998,0.472939,0.0,0.182163,0.0,1.370392,0.0,0.674199,0.0,0.0,0.0,0.462208,0.0,0.0,0.674199,0.0,0.323335,0.0,0.0,0.462208,0.0,21.999978,3.999996,0.0,1.499999,0.0,14.499986,0.0,21.999978,3.999996,0.0,1.499999,0.0,0.0,14.499986,1.098612,0.693147,1.999998,0.982126,0.0,0.0,0.0,0.0,89.0,1.0824,2.0,0.8124,0.7535,89.0,0.4927,2.0,0.6497,0.5643,89.0,0.2,2.0,0.5,0.105,89.0,0.4728,1.5,0.5074,0.4798,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,1.3416,1.5,0.3637,1.091,89.0
11,8f030962-0267-4f93-97c9-5ae11990b65e,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-12,1,0,0,0,0,1,0,1,0,1,0,0,356.75,362.25,481.25,890.75,0.014215,0.014115,5.5,1.015417,363.025,368.2125,371.616667,0.010766,0.010967,124.5,0.741299,534.0,0.400505,2016,1,2016_01,12,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.5,0.0,0.0,0.5,0.5,0.5,0.75,1.0,0.01,0.714285,1.999998,0.0,0.0,0.464285,1.749998,0.0,0.416666,1.999998,0.0,0.0,0.270833,1.749998,0.0,0.0,0.0,1.333332,1.499999,0.0,0.0,1.999998,1.999998,0.416666,1.999998,0.0,0.0,0.270833,1.749998,0.0,0.0,1.333332,1.499999,0.0,0.0,1.999998,1.999998,0.416666,1.999998,0.0,0.0,0.270833,1.749998,0.0,0.0,0.0,0.0,1.333332,1.499999,0.0,0.0,0.0,0.672068,0.0,0.486283,0.0,1.430065,1.999998,0.534674,0.0,0.365805,0.0,1.385573,0.0,0.668557,0.0,0.0,0.0,0.443812,0.0,0.0,0.668557,0.0,0.558593,0.0,0.0,0.443812,0.0,23.999976,4.999995,0.0,3.249997,0.0,15.999984,0.0,23.999976,4.999995,0.0,3.249997,0.0,0.0,15.999984,0.693147,1.0116,0.999999,0.97874,0.0,0.0,0.0,0.0,89.0,1.45,2.0,0.86,0.976,89.0,0.5424,2.0,0.5952,0.6523,89.0,0.16,2.0,0.4726,0.085,89.0,0.5769,1.75,0.6176,0.6135,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,1.0378,1.5,0.4292,0.7757,89.0
12,c18c8f9b-63f2-4017-9923-f904db0f7da9,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-13,1,0,0,1,0,0,0,0,1,1,0,0,358.0,363.0,478.0,899.0,0.003504,0.003498,5.0,1.013966,361.975,367.9,371.239583,0.009983,0.010968,120.0,0.748954,541.0,0.39822,2016,1,2016_01,13,1,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.75,0.5,0.5,0.0,0.0,0.5,0.5,0.625,0.25,1.0,0.02,0.999999,1.999998,0.0,0.0,0.678571,1.749998,0.0,0.538461,1.999998,0.0,0.0,0.365384,1.749998,0.0,0.0,0.0,1.346153,1.499999,0.0,0.0,1.846152,1.999998,0.538461,1.999998,0.0,0.0,0.365384,1.749998,0.0,0.0,1.346153,1.499999,0.0,0.0,1.846152,1.999998,0.538461,1.999998,0.0,0.0,0.365384,1.749998,0.0,0.0,0.0,0.0,1.346153,1.499999,0.0,0.0,0.0,0.881758,0.0,0.646356,0.0,1.441108,1.777446,0.69773,0.0,0.492013,0.0,1.398306,0.5547,0.776249,0.0,0.0,0.0,0.427425,0.0,0.5547,0.776249,0.0,0.634226,0.0,0.0,0.427425,0.0,23.999976,6.999993,0.0,4.749995,0.0,17.499983,0.0,23.999976,6.999993,0.0,4.749995,0.0,0.0,17.499983,1.098612,0.91629,1.999998,0.975065,0.0,0.0,0.0,0.0,89.0,0.558,2.0,0.8801,0.1877,89.0,0.848,2.0,0.7344,0.9941,89.0,0.22,2.0,0.5017,0.0955,89.0,0.6237,1.75,0.6216,0.7104,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,1.1345,1.5,0.521,0.8138,89.0
13,070e5716-45dc-44cd-8113-a20d6248cefc,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-14,1,0,0,1,0,0,0,1,0,1,0,0,358.0,362.75,468.75,882.25,0.0,0.0,4.75,1.013268,361.0125,367.65,370.952083,0.009992,0.010902,110.75,0.763733,524.25,0.405781,2016,1,2016_01,14,1,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.5,0.5,0.0,0.0,0.25,0.0,0.5,0.75,1.0,0.01,1.142856,1.999998,0.0,0.0,0.928571,1.749998,0.0,0.571428,1.999998,0.0,0.0,0.464285,1.749998,0.0,0.0,0.0,1.285713,1.499999,0.0,0.0,1.714284,1.999998,0.571428,1.999998,0.0,0.0,0.464285,1.749998,0.0,0.0,1.285713,1.499999,0.0,0.0,1.714284,1.999998,0.571428,1.999998,0.0,0.0,0.464285,1.749998,0.0,0.0,0.0,0.0,1.285713,1.499999,0.0,0.0,0.0,0.899981,0.0,0.81645,0.0,1.296064,1.588494,0.729862,0.0,0.625744,0.0,1.302811,0.726272,0.755928,0.0,0.0,0.0,0.468807,0.0,0.726272,0.755928,0.0,0.71291,0.0,0.0,0.468807,0.0,23.999976,7.999992,0.0,6.499994,0.0,17.999982,0.0,23.999976,7.999992,0.0,6.499994,0.0,0.0,17.999982,0.693147,1.0116,0.999999,0.9711,0.0,0.0,0.0,0.0,89.0,0.1141,2.0,0.4107,0.0761,89.0,1.039,2.0,0.6199,1.1249,89.0,0.14,2.0,0.4453,0.0745,89.0,0.7973,2.0,0.6825,0.9266,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.0,0.0,0.0,0.0,89.0,0.7697,1.75,0.5459,0.4819,89.0


In [84]:
optimized_df_copy.to_csv('/kaggle/working/submission.csv', index=False)