# Linear Regression

Purpose: load the 24-feature processed dataset, fit LinearRegression, evaluate with CV, (optional) simple tuning, and save the trained model.


# Data Loading

In [1]:
from pathlib import Path
import pandas as pd

ROOT = Path("..")
PROC = ROOT / "data" / "train" / "housing_train_processed.csv"

df = pd.read_csv(PROC)
y = df["median_house_value"].values
X = df.drop(columns=["median_house_value"]).values
print(X.shape)  # should be (*, 24)


(16512, 24)


# Model Fitting

In [2]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)
print("Train R^2:", model.score(X, y))


Train R^2: 0.666180500095032


# Cross-Validation

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np

scores = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5, n_jobs=-1)
rmse = np.sqrt(-scores)
print("CV RMSE:", rmse)
print("Mean RMSE:", rmse.mean(), "±", rmse.std())


CV RMSE: [66816.46153052 66770.96413659 66705.3276392  67580.82945142
 66465.27874681]
Mean RMSE: 66867.77230090568 ± 376.52209628049826


# Hyperparameter Tuning

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

pipe = Pipeline([
    ("scale", StandardScaler(with_mean=True)),
    ("lr", LinearRegression())
])
param_grid = {
    "scale__with_mean": [True, False],
    "scale__with_std": [True, False],
}
gs = GridSearchCV(pipe, param_grid, scoring="neg_mean_squared_error", cv=5, n_jobs=-1)
gs.fit(X, y)
import numpy as np
print("Best params:", gs.best_params_)
print("Best RMSE:", np.sqrt(-gs.best_score_))
best_model = gs.best_estimator_


Best params: {'scale__with_mean': False, 'scale__with_std': True}
Best RMSE: 66868.83236138309


# Model Saving

In [5]:
import joblib, pathlib
final_model = best_model if "best_model" in locals() else model
out = pathlib.Path(".") / "linear_regression_model.pkl"
joblib.dump(final_model, out)
out


PosixPath('linear_regression_model.pkl')