In [3]:
import pandas as pd
df = pd.read_csv('evaluation_metrics.csv')
df['Patient'] = df['Patient'].astype(str)
agg_data = df.groupby(['Approach', 'Prediction Horizon'])['RMSE'].agg(['mean', 'std']).reset_index()

# Pivot the data to get approaches as columns
pivot_data = agg_data.pivot(index='Prediction Horizon', columns='Approach', values=['mean', 'std'])

# Format the table in LaTeX style with mean±std in the same cell
# Format the table with colored columns
latex_content = "% Requires: \\usepackage{colortbl}, \\usepackage{xcolor}\n"
latex_content += "\\begin{table}[htbp]\n"
latex_content += "\\centering\n"
latex_content += "\\renewcommand{\\arraystretch}{1.3}\n"
latex_content += "\\setlength{\\tabcolsep}{12pt}\n"
latex_content += "\\begin{tabular}{|c|>{\\columncolor{blue!15}}c|>{\\columncolor{blue!15}}c|}\n"
latex_content += "\\hline\n"
latex_content += "\\rowcolor{gray!25} \\cellcolor{white}\\textbf{Prediction} & \\textbf{With meal features} & \\textbf{Without meal features} \\\\ \n"
latex_content += "\\rowcolor{gray!25} \\cellcolor{white}\\textbf{Horizon} & \\textbf{RMSE (mean $\\pm$ std)} & \\textbf{RMSE (mean $\\pm$ std)} \\\\ \n"
latex_content += "\\hline\n"

for ph in sorted(pivot_data.index.unique()):
    pixtral_mean = pivot_data[('mean', 'pixtral-large-latest')].loc[ph]
    pixtral_std = pivot_data[('std', 'pixtral-large-latest')].loc[ph]
    nollm_mean = pivot_data[('mean', 'nollm')].loc[ph]
    nollm_std = pivot_data[('std', 'nollm')].loc[ph]
    
    # Highlight better performance with bold
    pixtral_cell = f"{pixtral_mean:.2f} $\\pm$ {pixtral_std:.2f}"
    nollm_cell = f"{nollm_mean:.2f} $\\pm$ {nollm_std:.2f}"
    
    if pixtral_mean < nollm_mean:
        pixtral_cell = f"\\textbf{{{pixtral_cell}}}"
    elif nollm_mean < pixtral_mean:
        nollm_cell = f"\\textbf{{{nollm_cell}}}"
    
    latex_content += f"{ph} & {pixtral_cell} & {nollm_cell} \\\\ \\hline\n"

latex_content += "\\end{tabular}\n"
latex_content += "\\caption{\\textbf{RMSE comparison} between approaches across different prediction horizons.\\\\Lower values are better and shown in \\textbf{bold}.}\n"
latex_content += "\\label{tab:rmse_comparison}\n"
latex_content += "\\end{table}"

# Save to .tex file
with open('latex_tables/rmse_comparison_table.tex', 'w') as f:
    f.write(latex_content)

In [2]:
# Calculate the difference between approaches at each prediction horizon
# Use the agg_data we already have

# Create a DataFrame to store differences
diff_data = pd.DataFrame()

# Based on the LaTeX table, we need to use these approach names
approach1 = 'pixtral-large-latest'  # With meal features
approach2 = 'nollm'                 # Without meal features

for ph in sorted(agg_data['Prediction Horizon'].unique()):
    # Get mean values for each approach at this prediction horizon
    # Use .iloc[0] instead of .values[0] with proper error handling
    approach1_data = agg_data[(agg_data['Approach'] == approach1) & 
                    (agg_data['Prediction Horizon'] == ph)]
    approach2_data = agg_data[(agg_data['Approach'] == approach2) & 
                    (agg_data['Prediction Horizon'] == ph)]
    
    # Skip if data is missing for either approach
    if approach1_data.empty or approach2_data.empty:
        continue
        
    mean1 = approach1_data['mean'].iloc[0]
    mean2 = approach2_data['mean'].iloc[0]
    
    # Calculate absolute and percentage differences
    abs_diff = mean2 - mean1  # positive means approach1 is better (lower RMSE)
    pct_diff = (abs_diff / mean1) * 100  # percentage improvement
    
    # Store in dataframe (using concat instead of deprecated append)
    new_row = pd.DataFrame({
        'Prediction Horizon': [ph],
        'Absolute Difference': [abs_diff],
        'Percentage Difference': [pct_diff]
    })
    diff_data = pd.concat([diff_data, new_row], ignore_index=True)

# Find the prediction horizon with the maximum difference (if data exists)
if not diff_data.empty:
    max_abs_diff_ph = diff_data.loc[diff_data['Absolute Difference'].idxmax()]['Prediction Horizon']
    max_pct_diff_ph = diff_data.loc[diff_data['Percentage Difference'].idxmax()]['Prediction Horizon']
else:
    max_abs_diff_ph = None
    max_pct_diff_ph = None

print(f"Differences between {approach1} and {approach2}:")
print(diff_data)
print(f"\nMaximum absolute difference: {diff_data['Absolute Difference'].max():.2f} at PH = {max_abs_diff_ph}")
print(f"Maximum percentage difference: {diff_data['Percentage Difference'].max():.2f}% at PH = {max_pct_diff_ph}")


Differences between pixtral-large-latest and nollm:
   Prediction Horizon  Absolute Difference  Percentage Difference
0                   6             0.405192               2.504092
1                   9             0.593866               2.367798
2                  12             1.273316               4.009791
3                  18             1.791747               4.217126
4                  24             4.083429               8.554049

Maximum absolute difference: 4.08 at PH = 24.0
Maximum percentage difference: 8.55% at PH = 24.0
