In [1]:
import numpy as np
import pandas as pd
import joblib

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



In [2]:
X_train = joblib.load("../models/X_processed.pkl")
y_train = joblib.load("../models/y.pkl")


In [3]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)


In [4]:
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)


In [5]:
y_pred_linear = linear_model.predict(X_train)
y_pred_ridge = ridge_model.predict(X_train)


In [6]:
def regression_metrics(y_true, y_pred):
    return {
        "MAE": mean_absolute_error(y_true, y_pred),
        "MSE": mean_squared_error(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "R2": r2_score(y_true, y_pred)
    }


In [7]:
metrics_linear = regression_metrics(y_train, y_pred_linear)
metrics_ridge = regression_metrics(y_train, y_pred_ridge)

baseline_metrics = {
    "Linear Regression": metrics_linear,
    "Ridge Regression": metrics_ridge
}

baseline_metrics


{'Linear Regression': {'MAE': 4170.886894163592,
  'MSE': 36501893.00741544,
  'RMSE': 6041.6796511744515,
  'R2': 0.7509130345985207},
 'Ridge Regression': {'MAE': 4174.379715775466,
  'MSE': 36502401.079211846,
  'RMSE': 6041.721698258853,
  'R2': 0.7509095675437587}}

In [8]:
linear_coefficients = pd.Series(
    linear_model.coef_
)

ridge_coefficients = pd.Series(
    ridge_model.coef_
)


In [9]:
joblib.dump(linear_model, "../models/linear_regression.pkl")
joblib.dump(ridge_model, "../models/ridge_regression.pkl")

import json
with open("../models/M6_baseline_metrics.json", "w") as f:
    json.dump(baseline_metrics, f, indent=4)
