# Combine RMSE Run Scores

In [1]:
import os
from pathlib import Path
import pandas as pd

# Get the notebook's directory path
notebook_dir = Path(os.getcwd())

# Get the project root directory (2 levels up from notebook)
project_root = notebook_dir.parent.parent
results_dir = project_root / "results/processed"
print(results_dir)

# Use pathlib's rglob to recursively find all csv files
csv_files = list(results_dir.rglob("*.csv"))
print(f"Found {len(csv_files)} CSV files")

sample_df = pd.read_csv(csv_files[0])

sample_df

/home/cjrisi/Projects/diabetes/nocturnal-hypo-gly-prob-forecast/results/processed
Found 30 CSV files


Unnamed: 0,validation_id,model_id,runtime_secs,MeanSquaredError_mean,MeanSquaredError_std
0,p02,ZeroShotTinyTimeMixerForecaster,7.877197,2.122298,0.673537
1,p03,ZeroShotTinyTimeMixerForecaster,4.828758,3.213307,2.069466
2,p04,ZeroShotTinyTimeMixerForecaster,0.431917,1.840493,0.538041
3,p10,ZeroShotTinyTimeMixerForecaster,0.442564,1.479329,0.589133
4,p11,ZeroShotTinyTimeMixerForecaster,0.432557,2.542815,1.102587
5,p12,ZeroShotTinyTimeMixerForecaster,0.924047,2.543586,1.827144


In [2]:
def parse_model_id(df):
    # Split on hyphens and create a dictionary of key-value pairs
    def parse_id(model_id):
        parts = model_id.split("-")
        # Initialize dictionary with default values
        result = {
            "model_type": parts[0],
            "model_details": " ".join(parts[1:]),
        }

        return result

    # Apply parsing to each model_id
    parsed = df["model_id"].apply(parse_id).apply(pd.Series)

    # Combine original dataframe with parsed columns
    return pd.concat([df, parsed], axis=1)


parse_model_id(sample_df)

Unnamed: 0,validation_id,model_id,runtime_secs,MeanSquaredError_mean,MeanSquaredError_std,model_type,model_details
0,p02,ZeroShotTinyTimeMixerForecaster,7.877197,2.122298,0.673537,ZeroShotTinyTimeMixerForecaster,
1,p03,ZeroShotTinyTimeMixerForecaster,4.828758,3.213307,2.069466,ZeroShotTinyTimeMixerForecaster,
2,p04,ZeroShotTinyTimeMixerForecaster,0.431917,1.840493,0.538041,ZeroShotTinyTimeMixerForecaster,
3,p10,ZeroShotTinyTimeMixerForecaster,0.442564,1.479329,0.589133,ZeroShotTinyTimeMixerForecaster,
4,p11,ZeroShotTinyTimeMixerForecaster,0.432557,2.542815,1.102587,ZeroShotTinyTimeMixerForecaster,
5,p12,ZeroShotTinyTimeMixerForecaster,0.924047,2.543586,1.827144,ZeroShotTinyTimeMixerForecaster,


In [3]:
all_results = parse_model_id(pd.read_csv(csv_files[0]))
all_results = all_results.drop(all_results.index)

for file in csv_files:
    full_path = os.path.abspath(file)
    model_family = os.path.basename(os.path.dirname(os.path.dirname(full_path)))
    filename = os.path.splitext(os.path.basename(file))[0]

    # print(f"\nProcessing: {full_path}")
    print(f"Time: {filename[-5:]}, Model Family: {model_family}, Filename: {filename}")

    df = pd.read_csv(file)
    df["time_delta"] = filename[-5:]
    df["model_family"] = model_family
    df["full_path"] = full_path
    all_results = pd.concat([all_results, parse_model_id(df)], ignore_index=True)
    # print(f"Added: {filename}")

# After the loop, reorder columns in the final combined DataFrame
order_columns = [
    "time_delta",
    "validation_id",
    "model_family",
    "model_type",
    "model_details",
    "model_id",
]
remaining_cols = [col for col in all_results.columns if col not in order_columns]
all_results = all_results[order_columns + remaining_cols]

Time: 05min, Model Family: foundation, Filename: 2025-02-23_00-35-12_ttm_05min
Time: 05min, Model Family: foundation, Filename: 2025-02-23_13-30-03_hftransformers_autoformer_05min
Time: 05min, Model Family: foundation, Filename: 2025-02-23_00-30-32_chronos_small_05min
Time: 05min, Model Family: foundation, Filename: 2025-02-23_15-38-42_hftransformers_informer_05min
Time: 05min, Model Family: foundation, Filename: 2025-02-23_16-25-05_hftransformers_ts_transformer_05min
Time: 05min, Model Family: foundation, Filename: 2025-02-23_00-32-31_chronos_base_05min
Time: 15min, Model Family: foundation, Filename: 2025-02-23_12-18-22_hftransformers_ts_transformer_15min
Time: 15min, Model Family: foundation, Filename: 2025-02-23_00-52-51_hftransformers_autoformer_15min
Time: 15min, Model Family: foundation, Filename: 2025-02-23_00-31-31_chronos_small_15min
Time: 15min, Model Family: foundation, Filename: 2025-02-23_00-35-45_ttm_15min
Time: 15min, Model Family: foundation, Filename: 2025-02-23_00-33

In [12]:
all_results.iloc[-1]["full_path"]

'/home/cjrisi/Projects/diabetes/nocturnal-hypo-gly-prob-forecast/results/processed/pinball/exponential/15mins/2025-02-27_22-16-48_1_exponential_smooth_15min.csv'

In [24]:
group_structure = ["time_delta", "validation_id", "model_family", "model_type"]
best_results_rmse = all_results.groupby(group_structure).agg(
    # Median=("MeanSquaredError_mean", "median"),
    Best=("MeanSquaredError_mean", "min"),
)

best_results_pinball = all_results.groupby(group_structure).agg(
    # Median=("PinballLoss_mean", "median"),
    Best=("PinballLoss_mean", "min"),
)

In [25]:
best_results_rmse

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Best
time_delta,validation_id,model_family,model_type,Unnamed: 4_level_1
05min,p02,arch,ARCH,2.194514
05min,p02,arma,AutoARIMA,1.940629
05min,p02,arma,AutoREG,
05min,p02,arma,NaiveForecaster,2.355567
05min,p02,arma,VARReduce,2.011910
...,...,...,...,...
15min,p06,exponential,StatsForecastAutoTheta,3.286409
15min,p06,foundation,ChronosForecaster,2.757905
15min,p06,foundation,HFTransformersForecaster,4.197128
15min,p06,foundation,ZeroShotTinyTimeMixerForecaster,3.115242


In [26]:
best_results_pinball

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Best
time_delta,validation_id,model_family,model_type,Unnamed: 4_level_1
05min,p02,arch,ARCH,
05min,p02,arma,AutoARIMA,0.250986
05min,p02,arma,AutoREG,
05min,p02,arma,NaiveForecaster,0.252837
05min,p02,arma,VARReduce,
...,...,...,...,...
15min,p06,exponential,StatsForecastAutoTheta,0.405048
15min,p06,foundation,ChronosForecaster,
15min,p06,foundation,HFTransformersForecaster,
15min,p06,foundation,ZeroShotTinyTimeMixerForecaster,


In [32]:
# Assuming best_results_rmse is your pivot table
# best_results_unstacked = best_results_rmse.unstack(level=[0, 1])  # Unstack first two levels
# OR if you need to specify the exact column names:
best_results_unstacked = best_results_rmse.unstack(["model_family", "model_type"])
best_results_unstacked
# To ensure proper column hierarchy
# best_results_unstacked.columns = best_results_unstacked.columns.set_names(['Model Family', 'Model Type'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best
Unnamed: 0_level_1,model_family,arch,arma,arma,arma,arma,exponential,exponential,exponential,exponential,foundation,foundation,foundation,structural
Unnamed: 0_level_2,model_type,ARCH,AutoARIMA,AutoREG,NaiveForecaster,VARReduce,AutoETS,StatsForecastAutoCES,StatsForecastAutoETS,StatsForecastAutoTheta,ChronosForecaster,HFTransformersForecaster,ZeroShotTinyTimeMixerForecaster,ARDL
time_delta,validation_id,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
05min,p02,2.194514,1.940629,,2.355567,2.01191,3.620625,2.934229,3.042978,3.041694,2.211347,2.865613,2.122298,
05min,p03,2.321361,2.527653,,2.796923,2.550394,3.47955,3.467259,3.447532,3.447707,2.943977,2.806601,3.213307,
05min,p04,1.567769,1.86923,,1.800809,1.679178,2.314285,2.177399,2.238956,2.238767,1.839182,2.503176,1.840493,
05min,p10,1.391561,1.35022,,1.382786,1.319039,1.973768,1.782622,1.734136,1.733843,1.435374,1.554336,1.479329,
05min,p11,2.335492,3.171313,,2.507485,2.344692,2.952654,2.876312,2.801125,2.802769,2.672459,3.07931,2.542815,
05min,p12,2.020626,1.949389,,2.551001,2.128375,2.551135,2.632567,2.655361,2.658062,2.444096,3.663847,2.543586,
15min,p01,3.469633,3.631367,3.024733,3.824405,3.545806,4.45968,4.68685,4.195522,4.192593,3.728067,4.576481,3.374613,3.565574
15min,p05,2.09469,2.600266,,2.312774,2.395073,2.425653,2.320795,2.312781,2.31353,2.280566,2.996683,2.328628,
15min,p06,2.887047,2.728342,,3.281241,2.934976,4.507109,3.390976,3.28125,3.286409,2.757905,4.197128,3.115242,


In [41]:
def highlight_scores(row):
    # Create masks
    min_mask = row == row.min()

    # Get NaiveForecaster values for each model family
    naive_values = row.loc[
        row.index.get_level_values("model_type") == "NaiveForecaster"
    ]

    # Compare each value with corresponding NaiveForecaster value
    naive_mask = row.apply(lambda x: any(x > naive_values))

    # Initialize style arrays
    bold = ["font-weight: bold; color: skyblue" if v else "" for v in min_mask]
    font_color = ["color: #FFB366" if v else "" for v in naive_mask]

    # Combine styles
    return [f"{b}; {bg}" if (b or bg) else "" for b, bg in zip(bold, font_color)]


# Apply styling
styled_results = best_results_unstacked.style.apply(highlight_scores, axis=1)
styled_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best,Best
Unnamed: 0_level_1,model_family,arch,arma,arma,arma,arma,exponential,exponential,exponential,exponential,foundation,foundation,foundation,structural
Unnamed: 0_level_2,model_type,ARCH,AutoARIMA,AutoREG,NaiveForecaster,VARReduce,AutoETS,StatsForecastAutoCES,StatsForecastAutoETS,StatsForecastAutoTheta,ChronosForecaster,HFTransformersForecaster,ZeroShotTinyTimeMixerForecaster,ARDL
time_delta,validation_id,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
05min,p02,2.194514,1.940629,,2.355567,2.01191,3.620625,2.934229,3.042978,3.041694,2.211347,2.865613,2.122298,
05min,p03,2.321361,2.527653,,2.796923,2.550394,3.47955,3.467259,3.447532,3.447707,2.943977,2.806601,3.213307,
05min,p04,1.567769,1.86923,,1.800809,1.679178,2.314285,2.177399,2.238956,2.238767,1.839182,2.503176,1.840493,
05min,p10,1.391561,1.35022,,1.382786,1.319039,1.973768,1.782622,1.734136,1.733843,1.435374,1.554336,1.479329,
05min,p11,2.335492,3.171313,,2.507485,2.344692,2.952654,2.876312,2.801125,2.802769,2.672459,3.07931,2.542815,
05min,p12,2.020626,1.949389,,2.551001,2.128375,2.551135,2.632567,2.655361,2.658062,2.444096,3.663847,2.543586,
15min,p01,3.469633,3.631367,3.024733,3.824405,3.545806,4.45968,4.68685,4.195522,4.192593,3.728067,4.576481,3.374613,3.565574
15min,p05,2.09469,2.600266,,2.312774,2.395073,2.425653,2.320795,2.312781,2.31353,2.280566,2.996683,2.328628,
15min,p06,2.887047,2.728342,,3.281241,2.934976,4.507109,3.390976,3.28125,3.286409,2.757905,4.197128,3.115242,


In [29]:
# Create pivot table from best_results
pivot_table_rmse = best_results_rmse.unstack(["model_family", "model_type"])
pivot_table_pinball = best_results_pinball.unstack(["model_family", "model_type"])


# Create style functions that bolds minimum and colors values worse than naive
def highlight_min_and_naive(row):
    is_min = row == row.min()
    worse_than_naive = row > row["NaiveForecaster"]

    styles = [""] * len(row)
    for idx in range(len(row)):
        style_parts = []
        if is_min[idx]:
            style_parts.append("font-weight: bold; color: royalblue")
        if worse_than_naive[idx]:
            style_parts.append("color: orange")
        styles[idx] = "; ".join(style_parts)
    return styles


# Apply the style and display the tables with 4 decimal places
styled_table_rmse = pivot_table_rmse.style.apply(
    highlight_min_and_naive, axis=1
).format("{:.4f}")

styled_table_pinball = pivot_table_pinball.style.apply(
    highlight_min_and_naive, axis=1
).format("{:.4f}")

display(styled_table_rmse)
display(styled_table_pinball)

KeyError: 'NaiveForecaster'

<pandas.io.formats.style.Styler at 0x7b29f1da1af0>

KeyError: 'NaiveForecaster'

<pandas.io.formats.style.Styler at 0x7b299c947b90>

In [9]:
# Reset index to convert pivot table to long format for seaborn
rmse_data = pivot_table_rmse.reset_index().melt(id_vars=["time_delta", "validation_id"])
pinball_data = pivot_table_pinball.reset_index().melt(
    id_vars=["time_delta", "validation_id"]
)

In [10]:
rmse_data

Unnamed: 0,time_delta,validation_id,model_type,value
0,05min,p02,model_family,arch
1,05min,p02,model_family,arma
2,05min,p02,model_family,exponential
3,05min,p02,model_family,foundation
4,05min,p02,model_family,structural
...,...,...,...,...
625,15min,p06,ZeroShotTinyTimeMixerForecaster,
626,15min,p06,ZeroShotTinyTimeMixerForecaster,
627,15min,p06,ZeroShotTinyTimeMixerForecaster,
628,15min,p06,ZeroShotTinyTimeMixerForecaster,3.115242


import seaborn as sns
import matplotlib.pyplot as plt

# Create figure with two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))

# Create box plots
sns.boxplot(data=rmse_data, x="model_type", y="value", ax=ax1)
sns.boxplot(data=pinball_data, x="model_type", y="value", ax=ax2)

# Customize plots
ax1.set_title("RMSE Distribution by Model Type")
ax1.set_xlabel("Model Type")
ax1.set_ylabel("RMSE")
ax1.tick_params(axis="x", rotation=90)

ax2.set_title("Pinball Loss Distribution by Model Type")
ax2.set_xlabel("Model Type")
ax2.set_ylabel("Pinball Loss")
ax2.tick_params(axis="x", rotation=90)

plt.tight_layout()
plt.show()