# 02 â€” Modeling

Interactive model fitting and residual analysis.

In [None]:
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.insert(0, "..")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.features import load_panel_and_build_features
from src.models import get_all_models

sns.set_style("whitegrid")

In [None]:
X, y = load_panel_and_build_features()
print(f"Features: {X.shape[1]}")
print(f"Observations: {y.notna().sum()}")

## Single Train/Test Split (Quick Exploration)

In [None]:
# Use ~80% for training
valid_mask = y.notna()
valid_dates = y[valid_mask].index
split_idx = int(len(valid_dates) * 0.8)

train_dates = valid_dates[:split_idx]
test_dates = valid_dates[split_idx:]

X_train, y_train = X.loc[train_dates], y.loc[train_dates]
X_test, y_test = X.loc[test_dates], y.loc[test_dates]

print(f"Train: {len(train_dates)} obs ({train_dates[0].date()} to {train_dates[-1].date()})")
print(f"Test:  {len(test_dates)} obs ({test_dates[0].date()} to {test_dates[-1].date()})")

In [None]:
models = get_all_models()
results = {}

for model in models:
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results[model.name] = preds
    mae = np.mean(np.abs(y_test.values - preds))
    print(f"{model.name}: MAE = {mae:.4f}")

## Residual Analysis

In [None]:
fig, axes = plt.subplots(len(models), 1, figsize=(14, 3 * len(models)))
for ax, model in zip(axes, models):
    residuals = y_test.values - results[model.name]
    ax.plot(test_dates, residuals, "o-", markersize=3)
    ax.axhline(0, color="k", linestyle="--", alpha=0.5)
    ax.set_ylabel(f"{model.name}\nResiduals")
    ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()