# 04 - Deep MLP
Train the Deep MLP and visualize results.


In [ ]:
from pathlib import Path
import sys

ROOT = Path("..").resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

import numpy as np
import pandas as pd


In [ ]:
import importlib
import src.models as models
importlib.reload(models)
make_mlp_model = models.make_mlp_model
make_pipeline = models.make_pipeline
build_search = models.build_search
from src.eval import evaluate_models
from src.plots import plot_actual_vs_pred, plot_error_distribution
from _common import load_dataset, prepare_features
from src.split import SplitConfig
from contextlib import contextmanager

SEED = 42

SMALL_MODE = True  # toggle for quick iteration
TUNE_MODE = "fast"  # off | fast | full
SEARCH_VERBOSE = 2  # sklearn CV logging
SEARCH_N_ITER = None  # only used for randomized search
SHOW_CV_TQDM = True  # tqdm progress for CV fits

MLP_VERBOSE = 2  # 0/False, 1=tqdm, 2=tqdm + per-epoch log
MLP_LOG_EVERY = 1
MLP_BATCH_LOG_EVERY = 20  # set 0 to disable
MLP_LIVE_PLOT_EVERY = 5   # epochs; set 0 to disable

# Small-mode overrides
MLP_HIDDEN_LAYERS = None
MLP_EPOCHS = None
MLP_BATCH_SIZE = None
MLP_PARAM_GRID = None
if SMALL_MODE:
    MLP_HIDDEN_LAYERS = (128, 64)
    MLP_EPOCHS = 40
    MLP_BATCH_SIZE = 64
    SEARCH_N_ITER = 6
    MLP_PARAM_GRID = {
        "model__hidden_layers": [(128, 64), (128, 64, 32)],
        "model__dropout": [0.2, 0.3],
        "model__lr": [1e-3],
        "model__batch_size": [64, 128],
        "model__epochs": [30, 60],
        "model__weight_decay": [0.0, 1e-4],
    }

# Optional GPU info + explicit device
DEVICE = None
try:
    import torch
    if torch.cuda.is_available():
        DEVICE = "cuda"
        print(f"GPU available: {torch.cuda.get_device_name(0)}")
    else:
        DEVICE = "cpu"
        print("GPU not available; using CPU.")
except Exception as exc:
    print(f"Torch not available for GPU check ({exc}).")


@contextmanager
def tqdm_joblib(total, desc="CV fits"):
    try:
        import joblib
        from tqdm.auto import tqdm
    except Exception:  # noqa: BLE001
        yield None
        return

    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            try:
                tqdm_bar.update(n=self.batch_size)
            except Exception:
                pass
            return super().__call__(*args, **kwargs)

    tqdm_bar = tqdm(total=total, desc=desc)
    old_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_bar
    finally:
        joblib.parallel.BatchCompletionCallBack = old_callback
        tqdm_bar.close()


def estimate_total_fits(search):
    try:
        n_splits = search.cv.get_n_splits()
    except Exception:
        n_splits = getattr(search.cv, "n_splits", 1)
    if hasattr(search, "n_iter"):
        n_candidates = search.n_iter
    else:
        try:
            from sklearn.model_selection import ParameterGrid
            n_candidates = len(list(ParameterGrid(search.param_grid)))
        except Exception:
            n_candidates = 1
    return n_splits * n_candidates


split_config = SplitConfig(test_rounds=6)
df, metadata = load_dataset()
train_df, val_df, trainval_df, test_df, features = prepare_features(df, metadata, split_config=split_config)

X_train = train_df[features]
y_train = train_df["LapTimeSeconds"].to_numpy()
X_val = val_df[features]
y_val = val_df["LapTimeSeconds"].to_numpy()

base = make_pipeline(
    make_mlp_model(
        SEED,
        verbose=MLP_VERBOSE,
        log_every=MLP_LOG_EVERY,
        log_batch_every=MLP_BATCH_LOG_EVERY,
        live_plot_every=MLP_LIVE_PLOT_EVERY,
        hidden_layers=MLP_HIDDEN_LAYERS,
        epochs=MLP_EPOCHS,
        batch_size=MLP_BATCH_SIZE,
        device=DEVICE,
    ),
    features,
)
model = build_search(
    "Deep MLP",
    base,
    random_state=SEED,
    mode=TUNE_MODE,
    param_grid=MLP_PARAM_GRID,
    n_iter=SEARCH_N_ITER,
    search_verbose=SEARCH_VERBOSE,
)

if SHOW_CV_TQDM and hasattr(model, "cv"):
    total_fits = estimate_total_fits(model)
    with tqdm_joblib(total_fits, desc="CV fits"):
        metrics, preds, fitted = evaluate_models({"Deep MLP": model}, X_train, y_train, X_val, y_val)
else:
    metrics, preds, fitted = evaluate_models({"Deep MLP": model}, X_train, y_train, X_val, y_val)

metrics




In [ ]:
best = fitted["Deep MLP"].best_estimator_ if hasattr(fitted["Deep MLP"], "best_estimator_") else fitted["Deep MLP"]
X_trainval = trainval_df[features]
y_trainval = trainval_df["LapTimeSeconds"].to_numpy()
X_test = test_df[features]
y_test = test_df["LapTimeSeconds"].to_numpy()
best.fit(X_trainval, y_trainval)
test_pred = best.predict(X_test)

plot_actual_vs_pred(y_test, test_pred, title="Deep MLP: Predicted vs Actual")


In [ ]:
plot_error_distribution(y_test, test_pred, title="Deep MLP: Residuals")


In [ ]:
# Training history (post-hoc)
import plotly.express as px

est = fitted["Deep MLP"]
if hasattr(est, "best_estimator_"):
    est = est.best_estimator_
model = est.named_steps["model"]
hist = getattr(model, "training_history_", None)
if hist:
    df_hist = pd.DataFrame(hist)
    fig = px.line(df_hist, y=["train_loss", "val_loss"], title="Deep MLP Training Curves")
    fig
else:
    print("No training history found.")
