In [1]:
import sys, os
sys.path.append(os.path.abspath(".."))

import pandas as pd
from pathlib import Path

from src.model2_memory import compute_model2_independent
from src.validation import compute_binary_metrics

# config
MERGED_CSV = "../data/processed/merged_for_models.csv"
OUT_METRICS = "../results/validation/model2_metrics.csv"
OUT_PRED = "../results/validation/model2_predictions.csv"
THRESHOLDS = [25, 30, 40]
OBS_COL = "OBS"   # consistent with merged_for_models.csv

# load merged data
df = pd.read_csv(MERGED_CSV)
print("Rows loaded:", len(df))

# compute Model 2 independently
df2 = compute_model2_independent(df, gamma=0.2, thresholds=THRESHOLDS)

# build metrics table using compute_binary_metrics for each threshold.
rows = []
for T in THRESHOLDS:
    pred_col = f"pred_T{T}"   # now consistent naming
    if pred_col not in df2.columns:
        raise KeyError(f"Expected prediction column missing: {pred_col}")
    metrics = compute_binary_metrics(df2[OBS_COL], df2[pred_col])
    row = {"T": T}
    row.update(metrics)
    rows.append(row)

metrics_df = pd.DataFrame(rows)[["T","TP","FP","FN","TN","Accuracy","Precision","Recall","F1"]]

# save predictions and metrics
Path(OUT_PRED).parent.mkdir(parents=True, exist_ok=True)
df2.to_csv(OUT_PRED, index=False)
Path(OUT_METRICS).parent.mkdir(parents=True, exist_ok=True)
metrics_df.to_csv(OUT_METRICS, index=False)

print("Model 2 validation metrics:")
display(metrics_df)
print("Predictions saved to:", OUT_PRED)
print("Metrics saved to:", OUT_METRICS)


Rows loaded: 40
Model 2 validation metrics:


Unnamed: 0,T,TP,FP,FN,TN,Accuracy,Precision,Recall,F1
0,25,25,1,0,14,0.975,0.961538,1.0,0.980392
1,30,25,0,0,15,1.0,1.0,1.0,1.0
2,40,16,0,9,15,0.775,1.0,0.64,0.780488


Predictions saved to: ../results/validation/model2_predictions.csv
Metrics saved to: ../results/validation/model2_metrics.csv
