In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer


In [None]:
data = pd.read_csv('/content/drive/MyDrive/housing.csv')
data


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


In [None]:
X = data.drop(["median_house_value", "ocean_proximity"], axis=1)
y = data["median_house_value"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)



In [None]:

print("=== Baseline Linear Regression ===")
print("Coefficients:", lin_reg.coef_)
print("Intercept:", lin_reg.intercept_)


=== Baseline Linear Regression ===
Coefficients: [-4.26323917e+04 -4.24500719e+04  1.18280965e+03 -8.18797708e+00
  1.16260128e+02 -3.84922131e+01  4.63425720e+01  4.05384044e+04]
Intercept: -3578224.234818279


In [None]:
imputer = SimpleImputer(strategy="mean")

X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)


In [None]:
y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

In [None]:
print("Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Test  MSE:", mean_squared_error(y_test, y_test_pred))

Train MSE: 4811134397.884198
Test  MSE: 5052955174.596939


In [None]:
alpha_grid = {"alpha": np.logspace(-3, 0, 13)}  # 0.001 … 1

In [None]:
ridge = Ridge(random_state=42)
lasso = Lasso(random_state=42, max_iter=10000)

In [None]:
ridge_cv = GridSearchCV(
    ridge, alpha_grid, cv=5, scoring="neg_mean_squared_error", n_jobs=-1
)
lasso_cv = GridSearchCV(
    lasso, alpha_grid, cv=5, scoring="neg_mean_squared_error", n_jobs=-1
)

In [None]:
ridge_cv.fit(X_train, y_train)
lasso_cv.fit(X_train, y_train)

In [None]:
print("\n=== Hyperparameter Tuning Results ===")
print("Best Ridge alpha:", ridge_cv.best_params_["alpha"])
print("Best Ridge CV MSE:", -ridge_cv.best_score_)
print("Best Lasso alpha:", lasso_cv.best_params_["alpha"])
print("Best Lasso CV MSE:", -lasso_cv.best_score_)


=== Hyperparameter Tuning Results ===
Best Ridge alpha: 1.0
Best Ridge CV MSE: 4837033105.965662
Best Lasso alpha: 1.0
Best Lasso CV MSE: 4837034276.761754


In [None]:
best_ridge = ridge_cv.best_estimator_
best_lasso = lasso_cv.best_estimator_

In [None]:
ridge_train_pred = best_ridge.predict(X_train)
ridge_test_pred = best_ridge.predict(X_test)
lasso_train_pred = best_lasso.predict(X_train)
lasso_test_pred = best_lasso.predict(X_test)


In [None]:
print("\n=== Ridge (L2) with best alpha ===")
print("Coefficients:", best_ridge.coef_)
print("Train MSE:", mean_squared_error(y_train, ridge_train_pred))
print("Test  MSE:", mean_squared_error(y_test, ridge_test_pred))



=== Ridge (L2) with best alpha ===
Coefficients: [-4.26226944e+04 -4.24410116e+04  1.18296423e+03 -8.18887824e+00
  1.16246555e+02 -3.84926063e+01  4.63652583e+01  4.05389240e+04]
Train MSE: 4811134444.917009
Test  MSE: 5052928798.189811


In [None]:
print("\n=== Lasso (L1) with best alpha ===")
print("Coefficients:", best_lasso.coef_)
print("Train MSE:", mean_squared_error(y_train, lasso_train_pred))
print("Test  MSE:", mean_squared_error(y_test, lasso_test_pred))



=== Lasso (L1) with best alpha ===
Coefficients: [-4.26286808e+04 -4.24466049e+04  1.18286158e+03 -8.18829122e+00
  1.16254615e+02 -3.84923721e+01  4.63513818e+01  4.05385728e+04]
Train MSE: 4811134404.837946
Test  MSE: 5052944040.878705
