# Regression Models using Scikit-Learn
## Linear, Ridge (L2), and Lasso (L1) Regression
This notebook demonstrates how to train and evaluate three regression models — Linear, Ridge, and Lasso — using Scikit-Learn on the provided dataset.


## Importing Required Libraries

In [54]:

from sklearn import datasets
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge


## Loading and Inspecting the Dataset

In [38]:
df = pd.read_csv("California_Houses.csv")
# X are the predictior features that's why we dropped the Median House Value column because that's the target
x = df.drop(columns=['Median_House_Value'])
# Median House Value is the target and we need to predict the values
y = df['Median_House_Value']


## Train, Validation, Test Split

In [16]:
#Here we are splitting the data into 85% training set and 15% validation test that we will use after training the model 
x_train_val, x_test, y_train_val, y_test = train_test_split(x,y, test_size=(15/100) , random_state = 42)
#Here we are spliiting the 85% into 70% training set and 15% testing test for the model
x_train , x_val, y_train , y_val = train_test_split(x_train_val,y_train_val, test_size=(15/85), random_state=42)


## Feature Scaling

In [46]:
#Here we are scaling the data so that some columns value will dominate the values of other column and make the model behave badly
scaler = StandardScaler()
scaler.fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_val_scaled= scaler.transform(x_val)
x_test_scaled= scaler.transform(x_test)

## Model 1: Linear Regression

In [53]:
#Linear Regression

#Training the Model
regressor = LinearRegression()
regressor.fit(x_train_scaled, y_train)

# Predict on validation data
y_pred = regressor.predict(x_val_scaled)


# Evaluate using validation set
mse = mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

#Fianl Evaluation with the testing Set
y_pred_test = regressor.predict(x_test_scaled)

mse_test = mean_squared_error(y_test,y_pred_test)
mae_test = mean_squared_error(y_test,y_pred_test)
r2_test = r2_score(y_test,y_pred_test)

print("Final Test Results:")
print("MSE:", mse_test)
print("MAE:", mae_test)
print("R²:", r2_test)

Final Test Results:
MSE: 4857004593.678398
MAE: 4857004593.678398
R²: 0.6293799615491322


## Model 2: Lasso Regression (L1)

In [51]:
# L1 Regularization (Lasso Regression)

alphas = [0.001, 0.005, 0.01, 0.1, 1, 10]
results = []

for a in alphas:
    model = Lasso(alpha=a, max_iter=200000, tol=0.01) 
    model.fit(x_train_scaled, y_train)
    y_pred = model.predict(x_val_scaled)
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    results.append((a, mse, mae, r2))

# Pick best alpha
best_result = min(results, key=lambda x: x[1])
best_alpha = best_result[0]
print("\nBest alpha:", best_alpha)

# Train final model on training + validation
final_model = Lasso(alpha=best_alpha, max_iter=200000, tol=0.01)
final_model.fit(np.concatenate((x_train_scaled, x_val_scaled)),
                np.concatenate((y_train, y_val)))

# Test evaluation
y_pred_test = final_model.predict(x_test_scaled)
print("\nFinal Test Results:")
print("MSE:", mean_squared_error(y_test, y_pred_test))
print("MAE:", mean_absolute_error(y_test, y_pred_test))
print("R²:", r2_score(y_test, y_pred_test))



Best alpha: 0.001

Final Test Results:
MSE: 4855339654.3690605
MAE: 50732.63630045991
R²: 0.6295070069037213


## Model 3: Ridge Regression (L2)

In [49]:
#L2 Regularization (Ridge Regression)
alphas = [0.001, 0.005, 0.01, 0.1, 1, 10]
results = []

for a in alphas:
    model = Ridge(alpha=a, max_iter=200000, tol=0.01) 
    model.fit(x_train_scaled, y_train)
    y_pred = model.predict(x_val_scaled)
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    results.append((a, mse, mae, r2))

# Pick best alpha
best_result = min(results, key=lambda x: x[1])
best_alpha = best_result[0]
print("\nBest alpha:", best_alpha)

# Train final model on training + validation
final_model = Ridge(alpha=best_alpha, max_iter=200000, tol=0.01)
final_model.fit(np.concatenate((x_train_scaled, x_val_scaled)),
                np.concatenate((y_train, y_val)))

# Test evaluation
y_pred_test = final_model.predict(x_test_scaled)
print("\nFinal Test Results:")
print("MSE:", mean_squared_error(y_test, y_pred_test))
print("MAE:", mean_absolute_error(y_test, y_pred_test))
print("R²:", r2_score(y_test, y_pred_test))


Best alpha: 0.001

Final Test Results:
MSE: 4855339796.263224
MAE: 50732.63951573494
R²: 0.6295069960763031
