In [2]:
# 06_validate_model0.ipynb

import sys, os
sys.path.append(os.path.abspath(".."))

import pandas as pd
from pathlib import Path

from src.model0_baseline import compute_model0
from src.validation import compute_binary_metrics

# ------------------------------------------------
# CONFIG
# ------------------------------------------------
MERGED_CSV = "../data/processed/merged_for_models.csv"

OUT_METRICS = "../results/validation/model0_metrics.csv"
OUT_PREDICTIONS = "../results/validation/model0_predictions.csv"

THRESHOLDS = [25, 30, 40]

OBS_COL = "OBS"      # from merged_for_models.csv
VALUE_COL = "DHI0"   # Model 0 uses normalized SPI only

# ------------------------------------------------
# LOAD DATA
# ------------------------------------------------
df = pd.read_csv(MERGED_CSV)
print("Rows loaded:", len(df))

# ------------------------------------------------
# MODEL 0 — baseline (DHI0 only)
# ------------------------------------------------
df0 = compute_model0(df.copy(), dhi_col=VALUE_COL)

# prediction storage
all_predictions = df0[[OBS_COL, VALUE_COL]].copy()

rows = []

# ------------------------------------------------
# VALIDATE FOR EACH THRESHOLD
# ------------------------------------------------
for T in THRESHOLDS:
    pred_col = f"pred_T{T}"

    # store prediction column
    all_predictions[pred_col] = (df0[VALUE_COL] > T).astype(int)

    # compute metrics
    metrics = compute_binary_metrics(
        df0[OBS_COL],              # y_true
        all_predictions[pred_col]  # y_pred
    )

    metrics["threshold"] = T
    metrics["model"] = "Model 0"
    rows.append(metrics)

# build metrics table
metrics_df = pd.DataFrame(rows)

# ------------------------------------------------
# SAVE OUTPUTS
# ------------------------------------------------
Path(OUT_METRICS).parent.mkdir(parents=True, exist_ok=True)
Path(OUT_PREDICTIONS).parent.mkdir(parents=True, exist_ok=True)

metrics_df.to_csv(OUT_METRICS, index=False)
all_predictions.to_csv(OUT_PREDICTIONS, index=False)

# ------------------------------------------------
# DISPLAY SUMMARY
# ------------------------------------------------
print("Model 0 validation metrics:")
display(metrics_df)

print("\nPrediction sample:")
display(all_predictions.head())

print(f"\nSaved metrics → {OUT_METRICS}")
print(f"Saved predictions → {OUT_PREDICTIONS}")


Rows loaded: 40
Model 0 validation metrics:


Unnamed: 0,TP,FP,TN,FN,Accuracy,Precision,Recall,F1,threshold,model
0,25,5,10,0,0.875,0.833333,1.0,0.909091,25,Model 0
1,25,1,14,0,0.975,0.961538,1.0,0.980392,30,Model 0
2,20,0,15,5,0.875,1.0,0.8,0.888889,40,Model 0



Prediction sample:


Unnamed: 0,OBS,DHI0,pred_T25,pred_T30,pred_T40
0,0,25.052,1,0,0
1,1,52.911,1,1,1
2,1,39.149,1,1,0
3,1,49.457,1,1,1
4,1,86.465,1,1,1



Saved metrics → ../results/validation/model0_metrics.csv
Saved predictions → ../results/validation/model0_predictions.csv
