# Modeling
This notebook trains, evaluates, and saves models.

In [3]:
import os, sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname('__file__'), "..")))

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

from src.data_loader import load_data
from src.preprocessing import build_preprocessor
from src.model import train_model, evaluate_model, save_model

ModuleNotFoundError: No module named 'utils'

In [None]:
# Load dataset (Math for example)
mat, _ = load_data()
if mat is None:
    print("Dataset not found!")
else:
    print("Math dataset loaded successfully!")
    print("Shape:", mat.shape)

NameError: name 'load_data' is not defined

In [None]:
X = mat.drop("G3", axis=1)
y = mat["G3"]

numeric_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()

print("Numeric columns:", len(numeric_cols))
print("Categorical columns:", len(categorical_cols))

In [None]:
# Define models
models = {
    "linear_regression": LinearRegression(),
    "random_forest": RandomForestRegressor(n_estimators=100, random_state=42),
}

print("Models to train:", list(models.keys()))

In [None]:
results = {}

for name, model in models.items():
    print(f"\n=== Training {name} ===")
    
    pipeline = Pipeline([
        ("preprocessor", build_preprocessor(numeric_cols, categorical_cols)),
        ("model", model),
    ])

    pipeline, X_test, y_test = train_model(X, y, pipeline)
    metrics = evaluate_model(
        pipeline, X_test, y_test,
        metrics_path="results/metrics",
        dataset_name=f"math_{name}"
    )
    save_model(pipeline, "results/models", f"{name}_math.pkl")
    results[name] = metrics
    
    print(f"{name} training completed!")

In [None]:
print("\n=== Model Results ===")
for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")