In [1]:
# Standard library
import json
from pathlib import Path
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix, f1_score, make_scorer, fbeta_score
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight

# XGBoost
from xgboost import XGBClassifier

# TensorFlow / Keras
from tensorflow import keras
from tensorflow.keras import layers

2026-01-14 19:27:15.032560: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-14 19:27:15.035351: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-14 19:27:15.210348: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-14 19:27:20.286943: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

In [2]:
# Base directory
path_dir = Path.cwd()

# Folder with the JSON files
json_dir = path_dir / "per_point_v2"

# Prepare an empty DataFrame with the expected columns and an index name
df = pd.DataFrame(columns=["x", "y", "visible", "action"])
df.index.name = "image_frame"

frames_df = []

for json_path in json_dir.glob("*.json"):
    with json_path.open("r", encoding="utf-8") as f:
        ball_data = json.load(f)  # expected: dict keyed by image_frame

    # Build a DataFrame from the JSON dict, then transpose:
    file_df = pd.DataFrame(ball_data).T
    file_df.index.name = "image_frame"

    # Ensure column names match the expected schema
    file_df = file_df.reindex(columns=["x", "y", "visible", "action"])

    frames_df.append(file_df)

# Final concatenation
df = pd.concat(frames_df, axis=0, ignore_index=False)
df.index.name = "image_frame"

In [3]:
def build_features(
    subset_df: pd.DataFrame,
    smooth_window: int = 7,
) -> pd.DataFrame:
    """
    Feature builder for ball hit / bounce detection.

    """

    # ------------------------------------------------------------------
    # Numeric positions and index
    # ------------------------------------------------------------------
    subset = subset_df.copy()
    subset.index = pd.to_numeric(subset.index, errors="coerce")
    subset = subset.sort_index()
    subset["x_i"] = pd.to_numeric(subset["x"], errors="coerce")
    subset["y_i"] = pd.to_numeric(subset["y"], errors="coerce")
    subset = subset.dropna(subset=["x_i", "y_i"])
    

    # ------------------------------------------------------------------
    # Raw positions
    # ------------------------------------------------------------------
    subset["x_raw"] = subset["x_i"]
    subset["y_raw"] = subset["y_i"]

    # ------------------------------------------------------------------
    # Centered smoothing on positions
    # ------------------------------------------------------------------

    # Centered rolling mean reduces high-frequency measurement noise
    # without eliminating physical discontinuities (hits / bounces).
    subset["x_smooth"] = (
        subset["x_raw"]
        .rolling(smooth_window, center=True, min_periods=1)
        .mean()
    )
    subset["y_smooth"] = (
        subset["y_raw"]
        .rolling(smooth_window, center=True, min_periods=1)
        .mean()
    )

    # ------------------------------------------------------------------
    # Time step (central)
    # ------------------------------------------------------------------
    t = subset.index.to_series()

    # ------------------------------------------------------------------
    # Smoothed derivatives (stable kinematics)
    # ------------------------------------------------------------------
    x_smooth = subset["x_smooth"].to_numpy()
    y_smooth = subset["y_smooth"].to_numpy()

    vx = np.gradient(x_smooth, t)
    vy = np.gradient(y_smooth, t)

    ax = np.gradient(vx, t)
    ay = np.gradient(vy, t)

    jx = np.gradient(ax, t)
    jy = np.gradient(ay, t)

    subset["vx"] = vx
    subset["vy"] = vy
    subset["ax"] = ax
    subset["ay"] = ay
    subset["jx"] = jx
    subset["jy"] = jy

    # ------------------------------------------------------------------
    # Raw derivatives (impulse-sensitive)
    # ------------------------------------------------------------------
    x_raw = subset["x_raw"].to_numpy()
    y_raw = subset["y_raw"].to_numpy()

    vx_raw = np.gradient(x_raw, t)
    vy_raw = np.gradient(y_raw, t)

    ax_raw = np.gradient(vx_raw, t)
    ay_raw = np.gradient(vy_raw, t)

    jx_raw = np.gradient(ax_raw, t)
    jy_raw = np.gradient(ay_raw, t)

    subset["vx_raw"] = vx_raw
    subset["vy_raw"] = vy_raw
    subset["ax_raw"] = ax_raw
    subset["ay_raw"] = ay_raw
    subset["jx_raw"] = jx_raw
    subset["jy_raw"] = jy_raw

    # ------------------------------------------------------------------
    # Raw derivatubes in absolute
    # ------------------------------------------------------------------

    subset["vx_abs_raw"] = np.abs(subset["vx_raw"])
    subset["vy_abs_raw"] = np.abs(subset["vy_raw"])
    subset["ax_abs_raw"] = np.abs(subset["ax_raw"])
    subset["ay_abs_raw"] = np.abs(subset["ay_raw"])
    subset["jx_abs_raw"] = np.abs(subset["jx_raw"])
    subset["jy_abs_raw"] = np.abs(subset["jy_raw"])

    # ------------------------------------------------------------------
    # Magnitudes (smoothed)
    # ------------------------------------------------------------------
    subset["v"] = np.sqrt(subset["vx"]**2 + subset["vy"]**2)
    subset["a"] = np.sqrt(subset["ax"]**2 + subset["ay"]**2)
    subset["jerk"] = np.sqrt(subset["jx"]**2 + subset["jy"]**2)

    # ------------------------------------------------------------------
    # Log magnitudes : preserves order and compresses large values
    # ------------------------------------------------------------------
    subset["log_v"] = np.log1p(subset["v"])    
    subset["log_a"] = np.log1p(subset["a"])
    subset["log_j"] = np.log1p(subset["jerk"])

    # ------------------------------------------------------------------
    # Directional features
    # ------------------------------------------------------------------
    subset["angle"] = np.arctan2(subset["vy"], subset["vx"])
    subset["delta_angle"] = np.gradient(subset["angle"])

    # ------------------------------------------------------------------
    # Centered rolling statistics (smoothed)
    # ------------------------------------------------------------------
    subset["v_mean"] = subset["v"].rolling(smooth_window, center=True, min_periods=1).mean()
    subset["v_std"]  = subset["v"].rolling(smooth_window, center=True, min_periods=1).std().fillna(0)

    subset["a_mean"] = subset["a"].rolling(smooth_window, center=True, min_periods=1).mean()
    subset["a_std"]  = subset["a"].rolling(smooth_window, center=True, min_periods=1).std().fillna(0)

    subset["j_mean"] = subset["jerk"].rolling(smooth_window, center=True, min_periods=1).mean()
    subset["j_std"]  = subset["jerk"].rolling(smooth_window, center=True, min_periods=1).std().fillna(0)

    # ------------------------------------------------------------------
    # Motion sign changes
    # ------------------------------------------------------------------
    subset["vx_sign"] = np.sign(subset["vx"]).fillna(0.0)
    subset["vx_sign_change"] = (
        subset["vx_sign"].diff().abs() > 0
    ).astype(int)
    
    subset["vy_sign"] = np.sign(subset["vy"]).fillna(0.0)
    subset["vy_sign_change"] = (
        subset["vy_sign"].diff().abs() > 0
    ).astype(int)

    return subset

# Select features
FEATURE_COLS = [
    "delta_angle",
    "vx_sign_change", 
    "vy_sign_change",
    "v", "a", "jerk",
    "vx", 'vy', 'ax', 'ay', 'jx', 'jy',
    "v_mean", "v_std",
    "a_mean", "a_std",
    "j_mean", "j_std",
    "log_v", "log_a", "log_j",
    "vx_abs_raw", "vy_abs_raw",
    "ax_abs_raw", "ay_abs_raw",
    "jx_abs_raw", "jy_abs_raw",
]
FEATURE_COLS_DEEP = FEATURE_COLS + ["x_i", "y_i"]
SMOOTH_WINDOW = 7

df_copy = df.copy()
df_copy.index = pd.to_numeric(df_copy.index, errors="coerce")
df_copy = df_copy.sort_index()
split_point = int(0.8 * len(df_copy))
train_df_raw = df_copy.iloc[:split_point]
test_df_raw  = df_copy.iloc[split_point:]
train_df = build_features(train_df_raw, smooth_window=SMOOTH_WINDOW)
test_df  = build_features(test_df_raw,  smooth_window=SMOOTH_WINDOW)

X_train = train_df[FEATURE_COLS]
X_test  = test_df[FEATURE_COLS]
X_train_deep = train_df[FEATURE_COLS_DEEP]
X_test_deep  = test_df[FEATURE_COLS_DEEP]
y_train = train_df["action"].to_numpy()
y_test  = test_df["action"].to_numpy()

# Scaling (fit on train, apply to test)
scaler = StandardScaler()
scaler_deep = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)
X_train_deep_scaled = scaler_deep.fit_transform(X_train_deep)
X_test_deep_scaled  = scaler_deep.transform(X_test_deep)

# Encode labels
le = LabelEncoder()
y_train_labeled = le.fit_transform(y_train)
y_test_labeled  = le.transform(y_test)
classes = le.classes_
num_classes = len(classes)

# Saving preprocessors
preprocessors = {
    "scaler": scaler,
    "scaler_deep": scaler_deep,
    "label_encoder": le
}
joblib.dump(preprocessors, "preprocessors.joblib")

['preprocessors.joblib']

# Unsupervised

In [None]:
physics_df = build_features(df_copy, smooth_window=SMOOTH_WINDOW)



  return np.nanmean(a, axis, out=out, keepdims=keepdims)


In [95]:
df_physics.loc[df_physics['action']=="hit", "pred_action_physics"].value_counts()

pred_action_physics
air    1550
hit      50
Name: count, dtype: int64

In [13]:
train_df['ay'].max()

np.float64(92.17857142857142)

In [59]:
from scipy.signal import find_peaks
import numpy as np

# ==============================
# Estimation automatique des seuils
# ==============================
def estimate_heuristic_thresholds(train_df, window=3):
    ay_abs = train_df["ay_abs_raw"].values
    ay = train_df["ay_raw"].values
    ax = train_df["ax_raw"].values
    vx = train_df["vx_raw"].values
    vy = train_df["vy_raw"].values
    jx = train_df["jx_raw"].values
    jy = train_df["jy_raw"].values

    jerk = np.sqrt(jx**2 + jy**2)

    return {
        "AY_PEAK_MIN": np.percentile(ay_abs, 70),       # seuil vertical plus permissif
        "PROMINENCE": np.percentile(ay_abs, 60),       # un peu moins strict
        "VX_ENERGY_DELTA": np.percentile(np.abs(np.diff(vx)), 70),
        "VX_MIN_MOVE": np.percentile(np.abs(vx), 20),
        "JERK_THRESHOLD": np.percentile(jerk, 70),
        "RATIO_VERT_HORIZ": 1.5,
        "AY_CONCAVE_MAX": np.percentile(ay, 10),
        "AY_VIOLENCE": np.percentile(ay_abs, 95),
    }

# ==============================
# Détecteur heuristique amélioré
# ==============================
def heuristic_event_detector(features_df, thresholds, refractory=10, window=3):
    df = features_df.copy()
    
    ay_abs = df["ay_abs_raw"].values
    ay = df["ay_raw"].values
    ax = df["ax_raw"].values
    vx = df["vx_raw"].values
    vy = df["vy_raw"].values
    jx = df["jx_raw"].values
    jy = df["jy_raw"].values
    
    jerk = np.sqrt(jx**2 + jy**2)

    # Détection des pics verticaux
    peaks, _ = find_peaks(
        ay_abs,
        height=thresholds["AY_PEAK_MIN"],
        prominence=thresholds["PROMINENCE"],
        distance=3
    )

    candidates = []

    for i in peaks:
        if i < window or i + window >= len(df):
            continue

        vx_pre = vx[i - window]
        vx_post = vx[i + window]
        vy_pre = vy[i - window]
        vy_post = vy[i + window]
        ay_val = ay[i]
        ax_val = ax[i]
        jerk_val = jerk[i]

        delta_vx = abs(vx_post) - abs(vx_pre)
        max_vx = max(abs(vx_pre), abs(vx_post))
        vx_flip = vx_pre * vx_post < 0
        vy_flip = vy_pre * vy_post < 0

        ratio_vert_horiz = abs(ay_val) / (abs(ax_val) + 1e-6)
        score = abs(ay_val) + jerk_val + 0.5 * delta_vx  # score combiné pondéré

        pred, priority = None, 0

        # ----- Hit -----
        if (
            (vx_flip and max_vx > thresholds["VX_MIN_MOVE"])
            or (delta_vx > thresholds["VX_ENERGY_DELTA"])
            or (abs(ay_val) > thresholds["AY_VIOLENCE"])
            or (jerk_val > thresholds["JERK_THRESHOLD"])
            or (ratio_vert_horiz < thresholds["RATIO_VERT_HORIZ"])
        ):
            pred, priority = 2, 2  # hit

        # ----- Bounce -----
        elif ay_val < thresholds["AY_CONCAVE_MAX"]:  # plus souple, vy_flip non requis
            pred, priority = 1, 1

        if pred is not None:
            candidates.append((df.index[i], pred, score, priority))

    # ----- Temporal NMS souple -----
    final = {}
    candidates.sort(key=lambda x: x[0])

    for c in candidates:
        if not final:
            final[c[0]] = c[1]
        else:
            last_frame = list(final.keys())[-1]
            if c[0] - last_frame >= refractory:
                final[c[0]] = c[1]
            else:
                # garder l'événement le plus fort dans la fenêtre
                last_score = [x[2] for x in candidates if x[0] == last_frame][0]
                if c[2] > last_score:
                    final[last_frame] = c[1]

    return final


In [60]:
# ==============================
# Exemple d'utilisation
# ==============================
thresholds = estimate_heuristic_thresholds(train_df)
heuristic_preds = heuristic_event_detector(test_df, thresholds)

y_pred_heuristic = np.zeros(len(test_df), dtype=int)
for frame, cls in heuristic_preds.items():
    if frame in test_df.index:
        y_pred_heuristic[test_df.index.get_loc(frame)] = cls

temporal_event_eval(
    y_test_labeled,
    y_pred_heuristic,
    tolerance=2,
    use_labels=False
)



Temporal event evaluation (+/- 2 frames)
----------------------------------------------------
Event        | Precision  | Recall     | F1-Score  
----------------------------------------------------
Class 1      |      0.413 |      0.185 |      0.256
Class 2      |      0.169 |      0.607 |      0.264


# Supervised

In [30]:
def temporal_event_eval(y_true, y_pred, tolerance=2, use_labels=True):
    """
    Event-level evaluation for temporal predictions with +/- tolerance.

    Parameters:
    -----------
    y_true : array-like
        Ground truth events (can be numeric or string labels)
    y_pred : array-like
        Predicted events (same format as y_true)
    tolerance : int
        Number of frames before/after to consider a prediction correct
    use_labels : bool
        If True, expects string labels like "air", "bounce", "hit".
        If False, expects numeric labels like 0, 1, 2.
    """

    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    if use_labels:
        event_classes = ["bounce", "hit"]
    else:
        event_classes = [1, 2]

    print(f"\nTemporal event evaluation (+/- {tolerance} frames)")
    print("-" * 52)
    print(f"{'Event':<12} | {'Precision':<10} | {'Recall':<10} | {'F1-Score':<10}")
    print("-" * 52)

    for event in event_classes:
        true_indices = np.where(y_true == event)[0]
        pred_indices = np.where(y_pred == event)[0]

        # ---------- Recall ----------
        matched_true = np.array([np.any(np.abs(pred_indices - t) <= tolerance) for t in true_indices])
        recall = matched_true.sum() / len(true_indices) if len(true_indices) > 0 else 0.0

        # ---------- Precision ----------
        matched_pred = np.array([np.any(np.abs(true_indices - p) <= tolerance) for p in pred_indices])
        precision = matched_pred.sum() / len(pred_indices) if len(pred_indices) > 0 else 0.0

        # ---------- F1 ----------
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0

        event_name = str(event).capitalize() if use_labels else f"Class {event}"
        print(f"{event_name:<12} | {precision:>10.3f} | {recall:>10.3f} | {f1:>10.3f}")

## 1.1 Random Forest

In [None]:
# Baseline model
rf = RandomForestClassifier(
    n_estimators=400,
    class_weight="balanced",   # to help with class imbalance
    random_state=42,
    n_jobs=-1
)

# Time-aware CV to preserve order of the frames and a gap to avoid data leakage
tscv = TimeSeriesSplit(n_splits=5, gap=SMOOTH_WINDOW // 2 + 1)

param_grid = {
    "max_depth": [40, 50,
    #  60
    ],
    "min_samples_split": [6, 7],
    "min_samples_leaf": [2,3,
    # 4
    ],
    "max_features": ["sqrt",
    #  "log2", None
    ]
}

grid = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=tscv,
    scoring="f1_macro", # Each class’s F1 contributes equally, to help with class imbalance
    n_jobs=-1,
)

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
best_rf = grid.best_estimator_

y_pred = best_rf.predict(X_test)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test,
    y_pred,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test, y_pred))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test,
    y_pred,
    tolerance=2,
    use_labels=True
)

joblib.dump(grid, "model/rf_model.joblib")

Best params: {'max_depth': 40, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 7}

=== Standard Evaluation ===
              precision    recall  f1-score   support

         air       0.99      0.99      0.99     23385
      bounce       0.69      0.64      0.66       308
         hit       0.71      0.53      0.61       323

    accuracy                           0.98     24016
   macro avg       0.80      0.72      0.75     24016
weighted avg       0.98      0.98      0.98     24016

[[23241    81    63]
 [  106   196     6]
 [  146     6   171]]

=== Temporal Tolerance Evaluation ===

Temporal event evaluation (+/- 2 frames)
----------------------------------------------------
Event        | Precision  | Recall     | F1-Score  
----------------------------------------------------
Bounce       |      0.968 |      0.740 |      0.839
Hit          |      0.925 |      0.607 |      0.733


['model/rf_model.joblib']

## 1.2 Balanced Random Forest

In [None]:
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# 1) Time-aware CV with a small gap to avoid centered-window bleed
tscv = TimeSeriesSplit(n_splits=5, gap=SMOOTH_WINDOW // 2 + 1)

# 2) Balanced RF (undersampling per tree)
rf = BalancedRandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

# 3) Lean grid
param_grid = {
    "max_depth": [None, 20],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 2],
    "max_features": ["sqrt", "log2", None],
}

grid = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=tscv,
    scoring="precision",
    n_jobs=-1,
    refit=True,
)

grid.fit(X_train, y_train)
print("Best params:", grid.best_params_)

best_rf = grid.best_estimator_
y_pred = best_rf.predict(X_test)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test,
    y_pred,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test_labeled, y_pred))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test,
    y_pred,
    tolerance=2,
    use_labels=True
)

joblib.dump(grid, "unused_models/rfus_model.joblib")


## 2.1 XG BOOST

In [None]:
# Weights per class
sample_weights = compute_sample_weight(
    class_weight="balanced",
    y=y_train_labeled
)

# Boost non-zero classes (hits / bounces)
sample_weights[y_train_labeled > 0] *= 5

f05_scorer = make_scorer(
    fbeta_score,
    beta=0.5,
    average="macro"
)

xgb = XGBClassifier(
    objective="multi:softprob",
    num_class=len(classes),
    tree_method="hist",
    eval_metric="mlogloss",
    n_estimators=400,
    random_state=42,
    n_jobs=-1
)

tscv = TimeSeriesSplit(
    n_splits=5,
    gap=SMOOTH_WINDOW // 2 + 1
)

param_grid = {
    "max_depth": [3, 6],
    "learning_rate": [0.03, 0.07],
    "subsample": [0.7, 1.0],
    "colsample_bytree": [0.7, 1.0],
    "min_child_weight": [1, 5],
    "gamma": [0.0, 1.0],
}

grid = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    cv=tscv,
    scoring=f05_scorer,
    n_jobs=-1,
    refit=True,
    verbose=1
)

grid.fit(
    X_train,
    y_train_labeled,
    sample_weight=sample_weights
)

print("Best parameters:", grid.best_params_)
best_xgb = grid.best_estimator_


y_pred = best_xgb.predict(X_test)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test_labeled,
    y_pred,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test_labeled, y_pred))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test_labeled,
    y_pred,
    tolerance=2,
    use_labels=False
)

joblib.dump(grid, "unused_models/xgb_model.joblib")

['unused_models/xgb_model.joblib']

## 2.2 XG BOOST with Undersampling

In [None]:
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.under_sampling import RandomUnderSampler
from xgboost import XGBClassifier
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV

pipe = ImbPipeline(steps=[
    ("rus", RandomUnderSampler(random_state=42)),  # undersampling de la majority class
    ("xgb", XGBClassifier(
        objective="multi:softprob",
        num_class=len(np.unique(y_train)),
        tree_method="hist",
        n_estimators=300,
        random_state=42,
        n_jobs=-1,
        eval_metric="mlogloss",
    ))
])


tscv = TimeSeriesSplit(n_splits=5, gap=SMOOTH_WINDOW // 2 + 1)

param_grid = {
    "xgb__max_depth": [3, 6],
    "xgb__learning_rate": [0.05, 0.1],
    "xgb__subsample": [0.7, 1.0],
    "xgb__colsample_bytree": [0.7, 1.0],
    "xgb__min_child_weight": [1, 5],
}

grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=tscv,
    scoring="f1_macro",
    n_jobs=-1,
    refit=True,
)

grid.fit(X_train, y_train_labeled)
print("Best params:", grid.best_params_)

best_pipe = grid.best_estimator_
y_pred = best_pipe.predict(X_test)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test_labeled,
    y_pred,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test_labeled, y_pred))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test_labeled,
    y_pred,
    tolerance=2,
    use_labels=False
)

joblib.dump(grid, "unused_models/xgbus_model.joblib")

['unused_models/xgbus_model.joblib']

## 3. MLP

In [63]:
# ====== Make a small validation split from the tail of train (chronological) ======
val_ratio = 0.1
split_idx = int((1.0 - val_ratio) * len(X_train_scaled))
X_train_mlp, X_val_mlp = X_train_scaled[:split_idx], X_train_scaled[split_idx:]
y_train_mlp, y_val_mlp = y_train_labeled[:split_idx], y_train_labeled[split_idx:]


# --- Your model builder ---
def build_mlp(input_dim, num_classes):
    model = keras.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(num_classes, activation="softmax")
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=[]  
    )
    return model

mlp = build_mlp(X_train_mlp.shape[1], num_classes)

# --- Macro F1 callback ---
class MacroF1Callback(keras.callbacks.Callback):
    def __init__(self, X_val, y_val, patience=5):
        super().__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.best_f1 = -np.inf
        self.best_weights = None
        self.patience = patience
        self.wait = 0

    def on_epoch_end(self, epoch, logs=None):
        y_proba = self.model.predict(self.X_val, verbose=0)
        y_pred = y_proba.argmax(axis=1)
        f1_macro = f1_score(self.y_val, y_pred, average="macro", zero_division=0)
        logs = logs or {}
        logs["val_f1_macro"] = f1_macro
        print(f" — val_f1_macro: {f1_macro:.4f}")

        if f1_macro > self.best_f1:
            self.best_f1 = f1_macro
            self.best_weights = self.model.get_weights()
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                print(f"Early stopping on macro F1 (patience={self.patience}). Restoring best weights.")
                self.model.stop_training = True
                if self.best_weights is not None:
                    self.model.set_weights(self.best_weights)

macro_f1_cb = MacroF1Callback(X_val_mlp, y_val_mlp, patience=5)

# --- Other callbacks for stability ---
callbacks = [
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5),
    macro_f1_cb,
]

# Weights per class
sample_weights_mlp = compute_sample_weight(
    class_weight="balanced",
    y=y_train_mlp
)
# Boost non-zero classes (hits / bounces)
sample_weights_mlp[y_train_mlp > 0] *= 5

history = mlp.fit(
    X_train_mlp, y_train_mlp,
    validation_data=(X_val_mlp, y_val_mlp),
    epochs=30,
    batch_size=128,
    sample_weight=sample_weights_mlp,
    callbacks=callbacks,
)


# ====== Evaluate on test ======
y_proba = mlp.predict(X_test_scaled, batch_size=256)
y_pred  = y_proba.argmax(axis=1)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test_labeled,
    y_pred,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test_labeled, y_pred))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test_labeled,
    y_pred,
    tolerance=2,
    use_labels=False
)

joblib.dump(mlp, "unused_models/mlp_model.joblib")


Epoch 1/30
[1m618/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 2.4818 — val_f1_macro: 0.3364
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 2.0180 - val_loss: 1.1712 - learning_rate: 0.0010 - val_f1_macro: 0.3364
Epoch 2/30
[1m627/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 1.3790 — val_f1_macro: 0.3625
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.3700 - val_loss: 0.9520 - learning_rate: 0.0010 - val_f1_macro: 0.3625
Epoch 3/30
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 1.2056 — val_f1_macro: 0.3529
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 1.1708 - val_loss: 1.0100 - learning_rate: 0.0010 - val_f1_macro: 0.3529
Epoch 4/30
[1m615/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - loss: 1.0321 — val_f1_macro: 0.3531
[1m631/631[0m [32m━━━━━━━━━━━━━━━

['unused_models/mlp_model.joblib']

## 4. LSTM

In [68]:
# ----- Macro F1 callback -----
class MacroF1Callback(keras.callbacks.Callback):
    def __init__(self, X_val, y_val, patience=6):
        super().__init__()
        self.X_val = X_val
        self.y_val = y_val
        self.best_f1 = -np.inf
        self.best_weights = None
        self.patience = patience
        self.wait = 0

    def on_epoch_end(self, epoch, logs=None):
        y_proba = self.model.predict(self.X_val, verbose=0, batch_size=128)
        y_pred = y_proba.argmax(axis=1)
        f1_macro = f1_score(self.y_val, y_pred, average="macro", zero_division=0)
        logs = logs or {}
        logs["val_f1_macro"] = f1_macro
        print(f" — val_f1_macro: {f1_macro:.4f}")

        if f1_macro > self.best_f1:
            self.best_f1 = f1_macro
            self.best_weights = self.model.get_weights()
            self.wait = 0
        else:
            self.wait += 1
            if self.wait >= self.patience:
                print(f"Early stopping on macro F1 (patience={self.patience}). Restoring best weights.")
                self.model.stop_training = True
                if self.best_weights is not None:
                    self.model.set_weights(self.best_weights)


def build_lstm(window_size, feature_dim, num_classes, bidirectional=False):
    inputs = keras.Input(shape=(window_size, feature_dim))

    x = inputs
    if bidirectional:
        x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
        x = layers.Bidirectional(layers.LSTM(64))(x)
    else:
        x = layers.LSTM(64, return_sequences=True)(x)
        x = layers.LSTM(64)(x)

    x = layers.Dropout(0.2)(x)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=[]  
    )
    return model

# ====== Sequences (from your code) ======
def make_sequences(X: np.ndarray, y: np.ndarray, window: int = 7, stride: int = 1):
    X_seq, y_seq = [], []
    for start in range(0, len(X) - window + 1, stride):
        end = start + window
        X_seq.append(X[start:end])
        mid_idx = start + window // 2
        y_seq.append(y[mid_idx])
    return np.array(X_seq), np.array(y_seq)

window = 7
stride = 1

X_train_seq, y_train_seq = make_sequences(X_train_deep_scaled, y_train_labeled, window=window, stride=stride)
X_test_seq,  y_test_seq  = make_sequences(X_test_deep_scaled,  y_test_labeled,  window=window, stride=stride)

num_classes = len(classes)
feature_dim = X_train_seq.shape[-1]

# Chronological validation split (tail)
val_ratio = 0.1
split_idx = int((1.0 - val_ratio) * len(X_train_seq))
X_train_lstm, X_val_lstm = X_train_seq[:split_idx], X_train_seq[split_idx:]
y_train_lstm, y_val_lstm = y_train_seq[:split_idx], y_train_seq[split_idx:]

# Weights per class
sample_weights_lstm = compute_sample_weight(
    class_weight="balanced",
    y=y_train_lstm
)
# Boost non-zero classes (hits / bounces)
sample_weights_lstm[y_train_lstm > 0] *= 5

# ====== Build LSTM ======
lstm = build_lstm(window, feature_dim, num_classes, bidirectional=True)

# Callbacks: LR on val_loss, early stop/restore on val macro F1 via custom callback
callbacks = [
    keras.callbacks.ReduceLROnPlateau(monitor="val_f1_macro", mode="max", factor=0.5, patience=3, min_lr=1e-5),
    MacroF1Callback(X_val_lstm, y_val_lstm, patience=6),
]

# ====== Train ======
history = lstm.fit(
    X_train_lstm, y_train_lstm,
    validation_data=(X_val_lstm, y_val_lstm),
    epochs=40,
    batch_size=128,
    sample_weight=sample_weights_lstm,
    callbacks=callbacks,
)


# ====== Evaluate ======
y_proba_seq = lstm.predict(X_test_seq, batch_size=128)
y_pred_seq  = y_proba_seq.argmax(axis=1)

print("\n=== Standard Evaluation ===")
print(classification_report(
    y_test_seq,
    y_pred_seq,
    target_names=classes,
    zero_division=0
))
print(confusion_matrix(y_test_seq, y_pred_seq))

print("\n=== Temporal Tolerance Evaluation ===")
temporal_event_eval(
    y_test_seq,
    y_pred_seq,
    tolerance=2,
    use_labels=False
)

joblib.dump(lstm, "unused_models/lstm_model.joblib")


Epoch 1/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 2.0158

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.2685
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 20ms/step - loss: 1.5060 - val_loss: 1.4534 - learning_rate: 0.0010 - val_f1_macro: 0.2685
Epoch 2/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - loss: 1.0111

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3177
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - loss: 0.9804 - val_loss: 1.2828 - learning_rate: 0.0010 - val_f1_macro: 0.3177
Epoch 3/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.8293

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3441
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.8453 - val_loss: 1.0859 - learning_rate: 0.0010 - val_f1_macro: 0.3441
Epoch 4/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.7269

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3604
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - loss: 0.7709 - val_loss: 1.0140 - learning_rate: 0.0010 - val_f1_macro: 0.3604
Epoch 5/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.6622

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3735
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.7224 - val_loss: 0.8138 - learning_rate: 0.0010 - val_f1_macro: 0.3735
Epoch 6/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.7274

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3781
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.6621 - val_loss: 1.2223 - learning_rate: 0.0010 - val_f1_macro: 0.3781
Epoch 7/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.6171

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4024
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - loss: 0.6223 - val_loss: 0.7052 - learning_rate: 0.0010 - val_f1_macro: 0.4024
Epoch 8/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.5868

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4271
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.5758 - val_loss: 0.5894 - learning_rate: 0.0010 - val_f1_macro: 0.4271
Epoch 9/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.4973

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.3559
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.5568 - val_loss: 1.0072 - learning_rate: 0.0010 - val_f1_macro: 0.3559
Epoch 10/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.5774

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4083
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.5222 - val_loss: 0.7479 - learning_rate: 0.0010 - val_f1_macro: 0.4083
Epoch 11/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.5463

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4212
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.5353 - val_loss: 0.7099 - learning_rate: 0.0010 - val_f1_macro: 0.4212
Epoch 12/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.4763

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4106
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 0.4586 - val_loss: 0.7648 - learning_rate: 0.0010 - val_f1_macro: 0.4106
Epoch 13/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.4945

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4764
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.4942 - val_loss: 0.4678 - learning_rate: 0.0010 - val_f1_macro: 0.4764
Epoch 14/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 16ms/step - loss: 0.4649

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4754
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 18ms/step - loss: 0.4216 - val_loss: 0.5069 - learning_rate: 0.0010 - val_f1_macro: 0.4754
Epoch 15/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.3951

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4167
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.4223 - val_loss: 0.6819 - learning_rate: 0.0010 - val_f1_macro: 0.4167
Epoch 16/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.4502

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4673
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.4187 - val_loss: 0.5202 - learning_rate: 0.0010 - val_f1_macro: 0.4673
Epoch 17/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 0.3739

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4624
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - loss: 0.3560 - val_loss: 0.5306 - learning_rate: 0.0010 - val_f1_macro: 0.4624
Epoch 18/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.3081

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4816
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.3526 - val_loss: 0.4929 - learning_rate: 0.0010 - val_f1_macro: 0.4816
Epoch 19/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.3244

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4849
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.3382 - val_loss: 0.4924 - learning_rate: 0.0010 - val_f1_macro: 0.4849
Epoch 20/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.3259

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5038
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.3216 - val_loss: 0.4516 - learning_rate: 0.0010 - val_f1_macro: 0.5038
Epoch 21/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.2810

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4675
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 20ms/step - loss: 0.2780 - val_loss: 0.5189 - learning_rate: 0.0010 - val_f1_macro: 0.4675
Epoch 22/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.2577

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4265
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.2802 - val_loss: 0.7673 - learning_rate: 0.0010 - val_f1_macro: 0.4265
Epoch 23/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.2696

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4794
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - loss: 0.2670 - val_loss: 0.5363 - learning_rate: 0.0010 - val_f1_macro: 0.4794
Epoch 24/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.2317

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5214
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - loss: 0.2127 - val_loss: 0.4399 - learning_rate: 0.0010 - val_f1_macro: 0.5214
Epoch 25/40
[1m628/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 0.1817

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5261
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 20ms/step - loss: 0.1884 - val_loss: 0.3847 - learning_rate: 0.0010 - val_f1_macro: 0.5261
Epoch 26/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 0.1980

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5338
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - loss: 0.1868 - val_loss: 0.3886 - learning_rate: 0.0010 - val_f1_macro: 0.5338
Epoch 27/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 18ms/step - loss: 0.2018

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4769
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - loss: 0.2424 - val_loss: 0.5325 - learning_rate: 0.0010 - val_f1_macro: 0.4769
Epoch 28/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 19ms/step - loss: 0.2502

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5353
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 21ms/step - loss: 0.2149 - val_loss: 0.3641 - learning_rate: 0.0010 - val_f1_macro: 0.5353
Epoch 29/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 19ms/step - loss: 0.1995

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4934
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 21ms/step - loss: 0.2105 - val_loss: 0.5214 - learning_rate: 0.0010 - val_f1_macro: 0.4934
Epoch 30/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.2050

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5561
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 22ms/step - loss: 0.1905 - val_loss: 0.3157 - learning_rate: 0.0010 - val_f1_macro: 0.5561
Epoch 31/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1968

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4752
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 22ms/step - loss: 0.2039 - val_loss: 0.5789 - learning_rate: 0.0010 - val_f1_macro: 0.4752
Epoch 32/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1950

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5418
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - loss: 0.2089 - val_loss: 0.3349 - learning_rate: 0.0010 - val_f1_macro: 0.5418
Epoch 33/40
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.1672

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.4955
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 23ms/step - loss: 0.1784 - val_loss: 0.5340 - learning_rate: 0.0010 - val_f1_macro: 0.4955
Epoch 34/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 0.1915

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5402
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - loss: 0.1674 - val_loss: 0.3564 - learning_rate: 0.0010 - val_f1_macro: 0.5402
Epoch 35/40
[1m629/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 0.2062

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5163
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - loss: 0.1886 - val_loss: 0.4293 - learning_rate: 0.0010 - val_f1_macro: 0.5163
Epoch 36/40
[1m630/631[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - loss: 0.1754

  callback.on_epoch_end(epoch, logs)


 — val_f1_macro: 0.5550
Early stopping on macro F1 (patience=6). Restoring best weights.
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 24ms/step - loss: 0.1822 - val_loss: 0.3470 - learning_rate: 0.0010 - val_f1_macro: 0.5550
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step

=== Standard Evaluation ===
              precision    recall  f1-score   support

         air       1.00      0.93      0.97     23379
      bounce       0.25      0.95      0.39       308
         hit       0.31      0.93      0.47       323

    accuracy                           0.93     24010
   macro avg       0.52      0.94      0.61     24010
weighted avg       0.98      0.93      0.95     24010

[[21828   898   653]
 [    9   294     5]
 [   19     5   299]]

=== Temporal Tolerance Evaluation ===

Temporal event evaluation (+/- 2 frames)
----------------------------------------------------
Event        | Precision  | Recall     | F1-Score  
------------------

['unused_models/lstm_model.joblib']

### Feature permutaion of LSTM

In [None]:
def permutation_feature_importance_lstm(
    model,
    X,
    y,
    metric_fn,
    n_repeats=3,
    batch_size=1024,
    random_state=42,
):
    """
    X shape: (samples, timesteps, features)
    Returns: importance array of shape (features,)
    """
    rng = np.random.RandomState(random_state)

    # Baseline score
    y_proba = model.predict(X, batch_size=batch_size, verbose=0)
    y_pred = y_proba.argmax(axis=1)
    baseline_score = metric_fn(y, y_pred)

    n_features = X.shape[-1]
    importances = np.zeros(n_features)

    for f in range(n_features):
        scores = []

        for _ in range(n_repeats):
            X_perm = X.copy()

            # Shuffle feature f **across samples**, keeping time structure
            perm_idx = rng.permutation(X_perm.shape[0])
            X_perm[:, :, f] = X_perm[perm_idx, :, f]

            y_proba_perm = model.predict(X_perm, batch_size=batch_size, verbose=0)
            y_pred_perm = y_proba_perm.argmax(axis=1)

            score = metric_fn(y, y_pred_perm)
            scores.append(score)

        importances[f] = baseline_score - np.mean(scores)

    return importances, baseline_score


# ====== Feature Importance (Permutation) ======
feature_importance, baseline_f1 = permutation_feature_importance_lstm(
    model=lstm,
    X=X_val_lstm,                # use validation set
    y=y_val_lstm,
    metric_fn=lambda y_true, y_pred: f1_score(
        y_true, y_pred, average="macro", zero_division=0
    ),
    n_repeats=5,
)

# Sort features by importance
feature_importance_df = (
    pd.DataFrame({
        "feature": feature_cols,
        "importance": feature_importance,
    })
    .sort_values("importance", ascending=False)
)

print("Baseline macro F1:", baseline_f1)
print(feature_importance_df)


plt.figure(figsize=(10, 6))
plt.barh(
    feature_importance_df["feature"],
    feature_importance_df["importance"]
)
plt.gca().invert_yaxis()
plt.title("Permutation Feature Importance (LSTM)")
plt.xlabel("Decrease in Macro F1")
plt.tight_layout()
plt.show()
