# Import Libs

In [23]:
from catboost import CatBoostClassifier, Pool
import joblib, pandas as pd, numpy as np
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss, log_loss
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import (
    roc_auc_score, average_precision_score, brier_score_loss, log_loss
)
# -------- paths / config
import time

# Evaluate Baseline Models

## Evaluate Baseline Models on Regular season

In [25]:
# --- Q7 · evaluate baseline LR models on test set ---------------------------
ART_DIR = Path("artifacts")
TEST_CSV = Path("../ift6758/data/milestone2/baseline_general_test.csv")  # adjust if needed
OUT_DIR  = Path("figs_q7/test_regular"); OUT_DIR.mkdir(parents=True, exist_ok=True)
SEED = 42

# -------- plotting helpers (same shapes as your q2/q6)
def roc_curve_pts(y_true, y_prob):
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    return fpr, tpr

def compute_goalrate_vs_percentile(y_true, y_prob, n_bins=100):
    y = np.asarray(y_true); p = np.asarray(y_prob)
    x_percentiles = np.arange(100, 0, -1)
    rates = np.full(n_bins, np.nan, dtype=float)
    for i, pct in enumerate(x_percentiles):
        hi = np.quantile(p, pct/100.0, method="linear")
        lo = np.quantile(p, max(pct-1, 0)/100.0, method="linear")
        mask = (p > hi) & (p > lo) if pct > 1 else (p <= hi) & (p >= lo)
        if mask.sum() > 0: rates[i] = y[mask].mean()
    return x_percentiles, rates

def compute_cum_goals_vs_percentile(y_true, y_prob, n_bins=100):
    y = np.asarray(y_true); p = np.asarray(y_prob)
    order = np.argsort(-p)               # high -> low
    y_sorted = y[order]
    cum_goals = np.cumsum(y_sorted)
    total_goals = max(1, y.sum())
    x_percentiles = np.arange(100, 0, -1)
    n = len(y)
    curve = np.zeros_like(x_percentiles, dtype=float)
    for i, pct in enumerate(x_percentiles):
        k = max(1, int(np.floor(pct/100.0 * n)))
        curve[i] = cum_goals[k-1] / total_goals
    return x_percentiles, curve

def plot_roc(ax, y_true, curves):
    ax.plot([0,1],[0,1], linestyle="--", label="chance (45°)")
    for label, probs in curves.items():
        fpr, tpr = roc_curve_pts(y_true, probs)
        auc = roc_auc_score(y_true, probs)
        ax.plot(fpr, tpr, label=f"{label} (AUC={auc:.3f})")
    ax.set_title("ROC curve (test)")
    ax.set_xlabel("False Positive Rate"); ax.set_ylabel("True Positive Rate")
    ax.legend(loc="lower right")

def plot_goalrate(ax, y_true, curves):
    for label, probs in curves.items():
        x, y = compute_goalrate_vs_percentile(y_true, probs)
        ax.plot(x, y, label=label)
    ax.set_title("Goal Rate vs Model Percentile (test)")
    ax.set_xlabel("Shot probability model percentile (high→low)")
    ax.set_ylabel("Goals / Shots"); ax.invert_xaxis(); ax.legend()

def plot_cum_goals(ax, y_true, curves):
    for label, probs in curves.items():
        x, y = compute_cum_goals_vs_percentile(y_true, probs)
        ax.plot(x, y, label=label)
    ax.set_title("Cumulative % of Goals vs Percentile (test)")
    ax.set_xlabel("Shot probability model percentile (high→low)")
    ax.set_ylabel("Proportion of goals"); ax.invert_xaxis(); ax.legend(loc="lower right")

def plot_calibration(ax, y_true, curves, n_bins=10):
    from sklearn.calibration import CalibrationDisplay
    for label, probs in curves.items():
        CalibrationDisplay.from_predictions(y_true, probs, n_bins=n_bins, name=label, ax=ax)
    ax.set_title("Reliability (Calibration) — test")
    ax.set_xlabel("Predicted probability"); ax.set_ylabel("Observed frequency")

# -------- load test data
df_test = pd.read_csv(TEST_CSV)
assert {"distance_from_net","shot_angle","is_goal"}.issubset(df_test.columns), "missing baseline columns in test.csv"
y_test = df_test["is_goal"].values

# -------- load baseline artifacts and evaluate
spec = {
    "lr-distance": (ART_DIR/"lr-distance.joblib", ["distance_from_net"]),
    "lr-angle":    (ART_DIR/"lr-angle.joblib",    ["shot_angle"]),
    "lr-both":     (ART_DIR/"lr-both.joblib",     ["distance_from_net","shot_angle"]),
}

curves = {}
rows = []
for name, (path, cols) in spec.items():
    model = joblib.load(path)
    X = df_test[cols].copy()
    # predict
    t0 = time.time()
    prob = model.predict_proba(X)[:,1]
    dt  = time.time() - t0
    auc = roc_auc_score(y_test, prob)
    ap  = average_precision_score(y_test, prob)
    b   = brier_score_loss(y_test, prob)
    ll  = log_loss(y_test, prob, labels=[0,1])
    rows.append((name, auc, ap, b, ll, dt))
    curves[name] = prob

# add random baseline
curves["random"] = np.random.RandomState(SEED).rand(len(y_test))

# -------- print metrics table
print("=== Baseline LR models on TEST ===")
for r in rows:
    print(f"{r[0]:<12} AUC={r[1]:.4f}  PR-AUC={r[2]:.4f}  Brier={r[3]:.4f}  LogLoss={r[4]:.4f}  time={r[5]:.3f}s")

# -------- save the four figures (baselines only for now)
plt.figure(figsize=(7,5)); ax=plt.gca(); plot_roc(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_roc_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_goalrate(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_goalrate_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_cum_goals(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_cum_goals_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_calibration(ax, y_test, curves, n_bins=10)
plt.savefig(OUT_DIR/"q7_general_test_calibration_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

print(f"\nfigs written to: {OUT_DIR.resolve()}")




=== Baseline LR models on TEST ===
lr-distance  AUC=0.6973  PR-AUC=0.1833  Brier=0.0853  LogLoss=0.3055  time=0.002s
lr-angle     AUC=0.5555  PR-AUC=0.1227  Brier=0.0887  LogLoss=0.3208  time=0.002s
lr-both      AUC=0.7126  PR-AUC=0.1925  Brier=0.0845  LogLoss=0.3015  time=0.002s

figs written to: C:\Users\Gazal\Documents\Masters_Fall_2025\Studies\IFT 6758\Projects\Project_1 NHL\Milestone_2\data-science-code\milestone2\figs_q7\test_regular


## Evaluate Baseline Models on Playoffs  

In [26]:
TEST_CSV = Path("../ift6758/data/milestone2/baseline_playoff_test.csv")  # adjust if needed
df_test = pd.read_csv(TEST_CSV)
assert {"distance_from_net","shot_angle","is_goal"}.issubset(df_test.columns), "missing baseline columns in test.csv"
y_test = df_test["is_goal"].values

curves = {}
rows = []
for name, (path, cols) in spec.items():
    model = joblib.load(path)
    X = df_test[cols].copy()
    # predict
    t0 = time.time()
    prob = model.predict_proba(X)[:,1]
    dt  = time.time() - t0
    auc = roc_auc_score(y_test, prob)
    ap  = average_precision_score(y_test, prob)
    b   = brier_score_loss(y_test, prob)
    ll  = log_loss(y_test, prob, labels=[0,1])
    rows.append((name, auc, ap, b, ll, dt))
    curves[name] = prob

# add random baseline
curves["random"] = np.random.RandomState(SEED).rand(len(y_test))

# -------- print metrics table
print("=== Baseline LR models on TEST ===")
for r in rows:
    print(f"{r[0]:<12} AUC={r[1]:.4f}  PR-AUC={r[2]:.4f}  Brier={r[3]:.4f}  LogLoss={r[4]:.4f}  time={r[5]:.3f}s")

# -------- save the four figures (baselines only for now)
plt.figure(figsize=(7,5)); ax=plt.gca(); plot_roc(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_roc_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_goalrate(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_goalrate_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_cum_goals(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_cum_goals_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_calibration(ax, y_test, curves, n_bins=10)
plt.savefig(OUT_DIR/"q7_playoff_test_calibration_baselines.png", dpi=160, bbox_inches="tight"); plt.close()

print(f"\nfigs written to: {OUT_DIR.resolve()}")



=== Baseline LR models on TEST ===
lr-distance  AUC=0.6758  PR-AUC=0.1478  Brier=0.0766  LogLoss=0.2826  time=0.003s
lr-angle     AUC=0.5674  PR-AUC=0.1104  Brier=0.0786  LogLoss=0.2923  time=0.001s
lr-both      AUC=0.6957  PR-AUC=0.1643  Brier=0.0759  LogLoss=0.2783  time=0.002s

figs written to: C:\Users\Gazal\Documents\Masters_Fall_2025\Studies\IFT 6758\Projects\Project_1 NHL\Milestone_2\data-science-code\milestone2\figs_q7\test_regular


# Evaluate the Best Catboost Model

In [27]:
TEST_CSV = Path("../ift6758/data/milestone2/advanced_general_test.csv")  # adjust if needed

## Evaluat the Best Catboost Model on Regular Season

In [28]:
# --- Q7 · add best Q6 CatBoost (+ Platt calibration) on TEST -----------------
# ---- locked-17 feature recipe (must match training)
BASE = ["distance_from_net","rebound","period","last_event_distance",
        "shot_angle","shot_type","period_time_seconds"]
RAW  = ["time_since_last_event","angle_change","event_speed","last_event_type"]
DERIVED = ["log_distance","abs_angle","cos_angle","dist_x_abs_angle","rush","big_turn"]
FEATURES = BASE + RAW + DERIVED
CAT_COLS = ["shot_type","last_event_type","period"]

def add_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out["rebound"] = out["rebound"].astype(float)
    out["log_distance"] = np.log1p(out["distance_from_net"])
    out["abs_angle"] = np.abs(out["shot_angle"])
    out["cos_angle"] = np.cos(np.deg2rad(out["shot_angle"]))
    out["dist_x_abs_angle"] = out["distance_from_net"] * out["abs_angle"]
    out["rush"] = (out["time_since_last_event"] <= 2).astype(float)
    out["big_turn"] = (out["angle_change"] >= 30).astype(float)
    return out

def make_pool(X: pd.DataFrame, y=None):
    # impute categoricals like in training
    imputer = SimpleImputer(strategy="most_frequent")
    for c in CAT_COLS:
        if c in X.columns:
            X.loc[:, c] = imputer.fit_transform(X[[c]])[:,0]
    cat_idx = [X.columns.get_loc(c) for c in CAT_COLS if c in X.columns]
    return Pool(X, label=y, cat_features=cat_idx)

In [29]:
# ---- load test
df_test = pd.read_csv(TEST_CSV)
df_test = add_features(df_test)
missing = set(FEATURES + ["is_goal"]) - set(df_test.columns)
assert not missing, f"test.csv missing columns: {missing}"

X_test = df_test[FEATURES].copy()
y_test = df_test["is_goal"].values

# ---- load CatBoost + Platt
cb_path = Path("q6_catboost_final.cbm")
platt_path = Path("q6_platt.joblib")

cb = CatBoostClassifier()
cb.load_model(str(cb_path))

pool_test = make_pool(X_test, y_test)
p_raw = cb.predict_proba(pool_test)[:,1]

try:
    platt = joblib.load(platt_path)
    p_cat = platt.predict_proba(p_raw.reshape(-1,1))[:,1]
    cal_used = True
except Exception:
    p_cat = p_raw
    cal_used = False

# ---- metrics
auc  = roc_auc_score(y_test, p_cat)
ap   = average_precision_score(y_test, p_cat)
brier = brier_score_loss(y_test, p_cat)
ll    = log_loss(y_test, p_cat, labels=[0,1])

print(f"[Q6 CatBoost {'CAL' if cal_used else 'RAW'}]  AUC={auc:.4f}  PR-AUC={ap:.4f}  Brier={brier:.4f}  LogLoss={ll:.4f}")

# ---- also load the 3 LR baselines to make combined 4-curve plots
spec = {
    "lr-distance": (ART_DIR/"lr-distance.joblib", ["distance_from_net"]),
    "lr-angle":    (ART_DIR/"lr-angle.joblib",    ["shot_angle"]),
    "lr-both":     (ART_DIR/"lr-both.joblib",     ["distance_from_net","shot_angle"]),
}
curves = {("catboost (calibrated)" if cal_used else "catboost (raw)"): p_cat}
for name, (path, cols) in spec.items():
    m = joblib.load(path)
    curves[name] = m.predict_proba(df_test[cols])[:,1]
curves["random"] = np.random.RandomState(42).rand(len(y_test))

# ---- save the 4 figures (now: catboost + 3 LR + random)
plt.figure(figsize=(7,5)); ax=plt.gca(); plot_roc(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_roc_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_goalrate(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_goalrate_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_cum_goals(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_general_test_cum_goals_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_calibration(ax, y_test, curves, n_bins=10)
plt.savefig(OUT_DIR/"q7_general_test_calibration_core4.png", dpi=160, bbox_inches="tight"); plt.close()

print(f"figs written to: {OUT_DIR.resolve()}")


[Q6 CatBoost CAL]  AUC=0.8737  PR-AUC=0.5696  Brier=0.0623  LogLoss=0.2180




figs written to: C:\Users\Gazal\Documents\Masters_Fall_2025\Studies\IFT 6758\Projects\Project_1 NHL\Milestone_2\data-science-code\milestone2\figs_q7\test_regular


## Evaluat the Best Catboost Model on Playoffs Season

In [18]:
TEST_CSV = Path("../ift6758/data/milestone2/advanced_playoff_test.csv")

In [None]:
# ---- load test
df_test = pd.read_csv(TEST_CSV)
df_test = add_features(df_test)
missing = set(FEATURES + ["is_goal"]) - set(df_test.columns)
assert not missing, f"test.csv missing columns: {missing}"

X_test = df_test[FEATURES].copy()
y_test = df_test["is_goal"].values

# ---- load CatBoost + Platt
cb_path = Path("q6_catboost_final.cbm")
platt_path = Path("q6_platt.joblib")

cb = CatBoostClassifier()
cb.load_model(str(cb_path))

pool_test = make_pool(X_test, y_test)
p_raw = cb.predict_proba(pool_test)[:,1]

try:
    platt = joblib.load(platt_path)
    p_cat = platt.predict_proba(p_raw.reshape(-1,1))[:,1]
    cal_used = True
except Exception:
    p_cat = p_raw
    cal_used = False

# ---- metrics
auc  = roc_auc_score(y_test, p_cat)
ap   = average_precision_score(y_test, p_cat)
brier = brier_score_loss(y_test, p_cat)
ll    = log_loss(y_test, p_cat, labels=[0,1])

print(f"[Q6 CatBoost {'CAL' if cal_used else 'RAW'}]  AUC={auc:.4f}  PR-AUC={ap:.4f}  Brier={brier:.4f}  LogLoss={ll:.4f}")

# ---- also load the 3 LR baselines to make combined 4-curve plots
spec = {
    "lr-distance": (ART_DIR/"lr-distance.joblib", ["distance_from_net"]),
    "lr-angle":    (ART_DIR/"lr-angle.joblib",    ["shot_angle"]),
    "lr-both":     (ART_DIR/"lr-both.joblib",     ["distance_from_net","shot_angle"]),
}
curves = {("catboost (calibrated)" if cal_used else "catboost (raw)"): p_cat}
for name, (path, cols) in spec.items():
    m = joblib.load(path)
    curves[name] = m.predict_proba(df_test[cols])[:,1]
curves["random"] = np.random.RandomState(42).rand(len(y_test))

# ---- save the 4 figures (now: catboost + 3 LR + random)
plt.figure(figsize=(7,5)); ax=plt.gca(); plot_roc(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_roc_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_goalrate(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_goalrate_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_cum_goals(ax, y_test, curves)
plt.savefig(OUT_DIR/"q7_playoff_test_cum_goals_core4.png", dpi=160, bbox_inches="tight"); plt.close()

plt.figure(figsize=(7,5)); ax=plt.gca(); plot_calibration(ax, y_test, curves, n_bins=10)
plt.savefig(OUT_DIR/"q7_playoff_test_calibration_core4.png", dpi=160, bbox_inches="tight"); plt.close()

print(f"figs written to: {OUT_DIR.resolve()}")

[Q6 CatBoost CAL]  AUC=0.8510  PR-AUC=0.4922  Brier=0.0598  LogLoss=0.2144




figs written to: C:\Users\Gazal\Documents\Masters_Fall_2025\Studies\IFT 6758\Projects\Project_1 NHL\Milestone_2\data-science-code\milestone2\figs_q7\test_regular
