# 02 - Linear Model
Train a linear baseline and visualize results.


In [ ]:
from pathlib import Path
import sys

ROOT = Path("..").resolve()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

import numpy as np
import pandas as pd


In [ ]:
from src.models import make_linear_model, make_pipeline
from src.eval import evaluate_models
from src.plots import plot_actual_vs_pred, plot_error_distribution, plot_model_comparison
from _common import load_dataset, prepare_features
from src.split import SplitConfig

SEED = 42
split_config = SplitConfig(test_rounds=6)
df, metadata = load_dataset()
train_df, val_df, trainval_df, test_df, features = prepare_features(df, metadata, split_config=split_config)

X_train = train_df[features]
y_train = train_df["LapTimeSeconds"].to_numpy()
X_val = val_df[features]
y_val = val_df["LapTimeSeconds"].to_numpy()

model = make_pipeline(make_linear_model(SEED), features)
metrics, preds, fitted = evaluate_models({"Linear": model}, X_train, y_train, X_val, y_val)
metrics


In [ ]:
best = fitted["Linear"]
X_trainval = trainval_df[features]
y_trainval = trainval_df["LapTimeSeconds"].to_numpy()
X_test = test_df[features]
y_test = test_df["LapTimeSeconds"].to_numpy()
best.fit(X_trainval, y_trainval)
test_pred = best.predict(X_test)

plot_actual_vs_pred(y_test, test_pred, title="Linear: Predicted vs Actual")


In [ ]:
plot_error_distribution(y_test, test_pred, title="Linear: Residuals")
