In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
# Load experimental results
df = pd.read_csv('../data/experimental/experiment_results_3.csv')

## Results for predictive performance

In [19]:
# Filter within-sample stratified split
df_within = df[df['split']=='within_sample']

# Compute mean metrics for each model and scenario_type
agg = df_within.groupby(['model', 'scenario_type'])[['RMSE', 'MAE', 'R2', 'Stability']].mean().reset_index()

In [20]:
pivot = agg.pivot(index='model', columns='scenario_type', values=['RMSE', 'MAE', 'R2', 'Stability'])
pivot.columns = [f"{metric}_{cond}" for metric, cond in pivot.columns]
pivot = pivot.reset_index()

In [21]:
pivot

Unnamed: 0,model,RMSE_baselines,RMSE_diversified,MAE_baselines,MAE_diversified,R2_baselines,R2_diversified,Stability_baselines,Stability_diversified
0,MLP,10.839541,5.759207,8.533659,2.679846,-1.425324,0.425906,0.000553,0.001747
1,RandomForest,6.784549,4.493421,5.2626,2.577088,0.131412,0.642109,0.019117,0.53838
2,Ridge,7.166562,6.788895,5.559178,5.48994,0.032576,0.284465,4.3e-05,8.3e-05
3,XGBoost,6.719461,4.11539,5.177952,2.295706,0.151075,0.697127,0.021734,0.733346


## Results for transfer ratios / stability comparison

In [5]:
df.head()

Unnamed: 0,run,scenario_type,scenario,split,context,model,RMSE,MAE,R2,Stability
0,0,baselines,full,within_sample,within_sample,Ridge,7.610812,5.862182,0.040328,0.000247
1,0,baselines,full,within_sample,within_sample,RandomForest,7.141293,5.471655,0.155082,0.028876
2,0,baselines,full,within_sample,within_sample,XGBoost,7.430083,5.558425,0.085364,0.02849
3,0,baselines,full,within_sample,within_sample,MLP,8.933511,6.513849,-0.322224,0.000515
4,0,baselines,full,region_holdout,east_asia_n_pacific,Ridge,8.262882,6.642976,-0.203283,-8.3e-05


In [8]:
# Define scenarios of interest
scenarios = ['full', 'median_balanced', 'max_balanced']

# Prepare split-specific aggregates
within = df[df['split']=='within_sample']
region = df[df['split']=='region_holdout']
size   = df[df['split']=='size_holdout']

# Compute mean RMSE per model and scenario
rmse_within = within.groupby(['model', 'scenario_type', 'scenario'])['RMSE'].mean().rename('RMSE_within').reset_index()
rmse_region = region.groupby(['model', 'scenario_type', 'scenario'])['RMSE'].mean().rename('RMSE_region').reset_index()
rmse_size   = size.groupby(['model', 'scenario_type', 'scenario'])['RMSE'].mean().rename('RMSE_size').reset_index()

# Merge on model and scenario
merged = rmse_within.merge(rmse_region, on=['model', 'scenario_type', 'scenario']).merge(rmse_size, on=['model', 'scenario_type', 'scenario'])

# Filter relevant scenarios
# merged = merged[merged['scenario'].isin(scenarios)]

# Compute Transfer Ratios
merged['Region_TR'] = merged['RMSE_region'] / merged['RMSE_within']
merged['Size_TR']   = merged['RMSE_size']   / merged['RMSE_within']

In [11]:
merged

Unnamed: 0,model,scenario_type,scenario,RMSE_within,RMSE_region,RMSE_size,Region_TR,Size_TR
0,MLP,baselines,constrained,11.76676,11.738985,10.952819,0.997639,0.930827
1,MLP,baselines,full,9.912321,10.788779,10.474518,1.088421,1.056717
2,MLP,diversified,max_balanced,4.818805,12.08639,12.052757,2.508171,2.501192
3,MLP,diversified,median_balanced,6.699609,12.570959,11.007365,1.876372,1.642986
4,RandomForest,baselines,constrained,6.4359,6.78865,6.567291,1.05481,1.020415
5,RandomForest,baselines,full,7.133198,7.520586,6.964803,1.054308,0.976393
6,RandomForest,diversified,max_balanced,3.457462,7.47332,7.789807,2.161505,2.253042
7,RandomForest,diversified,median_balanced,5.529381,8.022058,7.975064,1.450806,1.442307
8,Ridge,baselines,constrained,6.722313,8.336346,6.826259,1.240101,1.015463
9,Ridge,baselines,full,7.610812,8.193293,7.724753,1.076533,1.014971


In [17]:
merged.groupby(['model', 'scenario_type', 'scenario'])[['Region_TR', 'Size_TR']].mean().reset_index()

Unnamed: 0,model,scenario_type,scenario,Region_TR,Size_TR
0,MLP,baselines,constrained,0.997639,0.930827
1,MLP,baselines,full,1.088421,1.056717
2,MLP,diversified,max_balanced,2.508171,2.501192
3,MLP,diversified,median_balanced,1.876372,1.642986
4,RandomForest,baselines,constrained,1.05481,1.020415
5,RandomForest,baselines,full,1.054308,0.976393
6,RandomForest,diversified,max_balanced,2.161505,2.253042
7,RandomForest,diversified,median_balanced,1.450806,1.442307
8,Ridge,baselines,constrained,1.240101,1.015463
9,Ridge,baselines,full,1.076533,1.014971
