In [52]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import itertools
from sklearn.metrics import mean_squared_error, r2_score

In [53]:
np.random.seed(42)
df = pd.read_csv("California_Houses.csv")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Median_House_Value        20640 non-null  float64
 1   Median_Income             20640 non-null  float64
 2   Median_Age                20640 non-null  int64  
 3   Tot_Rooms                 20640 non-null  int64  
 4   Tot_Bedrooms              20640 non-null  int64  
 5   Population                20640 non-null  int64  
 6   Households                20640 non-null  int64  
 7   Latitude                  20640 non-null  float64
 8   Longitude                 20640 non-null  float64
 9   Distance_to_coast         20640 non-null  float64
 10  Distance_to_LA            20640 non-null  float64
 11  Distance_to_SanDiego      20640 non-null  float64
 12  Distance_to_SanJose       20640 non-null  float64
 13  Distance_to_SanFrancisco  20640 non-null  float64
dtypes: flo

In [54]:

X = df.drop("Median_House_Value", axis=1)
y = df["Median_House_Value"]



In [55]:

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42
)
X_val, X_test, y_val,y_test = train_test_split(
     X_temp, y_temp, test_size=0.50, random_state=42
)

total_rows = len(df)
print(f"Total rows: {total_rows}")
print(f"Training set:   {len(X_train)} rows ({len(X_train)/total_rows:.2%})")
print(f"Validation set: {len(X_val)} rows ({len(X_val)/total_rows:.2%})")
print(f"Testing set:    {len(X_test)} rows ({len(X_test)/total_rows:.2%})")

Total rows: 20640
Training set:   14448 rows (70.00%)
Validation set: 3096 rows (15.00%)
Testing set:    3096 rows (15.00%)


In [56]:
# scaling the data first so could apply regularization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
y_train = y_train / 100000
y_val = y_val / 100000
y_test = y_test / 100000


In [57]:
class CustomRegressor:
    def __init__(self, X, y, epochs=100, penalty=None,  gradient_method="batch"):

        self.epochs = epochs
        self.penalty=penalty

        self.weights = None
        self.bias = None
        self.X = X
        self.y = y
        self.gradient_method = gradient_method
    def fit(self,learning_rate=0.01, lambd=0):

        self.lambd=lambd
        self.learning_rate=learning_rate
        n_samples, n_features = self.X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        for i in range(self.epochs):
            if self.gradient_method == "batch":
                y_predicted = np.dot(self.X, self.weights) + self.bias
                dw = (1/n_samples) * np.dot(self.X.T, (y_predicted - self.y)) # X.T is the xj in the gradient descent(row reppresent feature and coluumn represent sample) as now each row in X.T represent feature, and as they are vectors the loop is implicity inside them
                db = (1/n_samples) * np.sum(y_predicted - self.y)

                if self.penalty == "l2":
                    dw += 2 * self.lambd * (self.weights)
                elif self.penalty == "l1":
                    dw += (self.lambd) * np.sign(self.weights) # diffrentiate the w multiplied with constant so remain the constant with the sign of w

                self.weights -= self.learning_rate * dw
                self.bias  -= self.learning_rate * db
            elif self.gradient_method == "sgd":

                indices = np.random.permutation(n_samples)
                for idx in indices:
                    xi = self.X[idx:idx+1]
                    yi = self.y.iloc[idx]
                    y_pred = np.dot(xi, self.weights) + self.bias
                    dw = np.dot(xi.T, (y_pred - yi)).flatten()
                    db = (y_pred - yi).item()

                    if self.penalty == "l2":
                        dw += 2 * self.lambd * (self.weights)
                    elif self.penalty == "l1":
                        dw += self.lambd * np.sign(self.weights)

                    self.weights -= self.learning_rate * dw
                    self.bias -= self.learning_rate * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

In [58]:



def grid_search(X_train, y_train, X_val, y_val, penalty_type, optimizer_type):
    print(f"--- Tuning {penalty_type} ({optimizer_type}) ---")

    best_mse = float('inf')
    best_params = {}
    if optimizer_type == "batch":
        learning_rates = [0.1, 0.01, 0.001]
    else:
        learning_rates = [0.001, 0.0001, 0.00001]

    if penalty_type is None:
        lambdas=[0]
    else:
        lambdas = [0, 0.001, 0.01, 0.1, 1, 10, 100]

    epochs = 100 if optimizer_type == "batch" else 20

    for lr, l in itertools.product(learning_rates, lambdas):
        model = CustomRegressor(X_train, y_train, epochs, penalty_type, optimizer_type)
        model.fit(lr, l)
        preds = model.predict(X_val)
        mse = mean_squared_error(y_val, preds)

        if mse < best_mse:
            best_mse = mse
            best_params = {'learning_rate': lr, 'lambd': l}


    print(f"Best Params: {best_params}")
    print(f"Best Validation MSE: {best_mse:.2f}")
    print("-" * 30)
    return best_params


In [59]:
best_ridge_batch = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, "l2", "batch")
best_lasso_batch = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, "l1", "batch")

best_ridge_sgd = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, "l2", "sgd")

best_linear_batch = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, None, "batch")

best_linear_sgd = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, None, "sgd")



best_lasso_sgd = grid_search(X_train_scaled, y_train, X_val_scaled, y_val, "l1", "sgd")



--- Tuning l2 (batch) ---
Best Params: {'learning_rate': 0.1, 'lambd': 0}
Best Validation MSE: 0.51
------------------------------
--- Tuning l1 (batch) ---
Best Params: {'learning_rate': 0.1, 'lambd': 0}
Best Validation MSE: 0.51
------------------------------
--- Tuning l2 (sgd) ---
Best Params: {'learning_rate': 0.001, 'lambd': 0}
Best Validation MSE: 0.49
------------------------------
--- Tuning None (batch) ---
Best Params: {'learning_rate': 0.1, 'lambd': 0}
Best Validation MSE: 0.51
------------------------------
--- Tuning None (sgd) ---
Best Params: {'learning_rate': 0.0001, 'lambd': 0}
Best Validation MSE: 0.50
------------------------------
--- Tuning l1 (sgd) ---
Best Params: {'learning_rate': 0.001, 'lambd': 0.001}
Best Validation MSE: 0.49
------------------------------


In [60]:
final_configs = [
    ("Linear Regression (Batch)", best_linear_batch, None, "batch"),
    ("Linear Regression (SGD)",   best_linear_sgd,   None, "sgd"),
    ("Ridge Regression (Batch)",  best_ridge_batch,  "l2", "batch"),
    ("Ridge Regression (SGD)",    best_ridge_sgd,    "l2", "sgd"),
    ("Lasso Regression (Batch)",  best_lasso_batch,  "l1", "batch"),
    ("Lasso Regression (SGD)",    best_lasso_sgd,    "l1", "sgd"),
]

results_data = []
feature_names = X_train.columns

weights_report = []
print("--- FINAL TESTING RESULTS ---\n")

# 2. Loop through each configuration
for name, params, penalty, optimizer in final_configs:


    model = CustomRegressor(
        X_train_scaled,
        y_train,
        epochs=100 if optimizer == "batch" else 20,
        penalty=penalty,
        gradient_method=optimizer
    )


    model.fit(params['learning_rate'],params['lambd'])


    y_pred_test = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred_test)
    r2 = r2_score(y_test, y_pred_test)

    results_data.append({
        "Model": name,
        "Optimizer": optimizer.upper(),
        "Best Lambda": params['lambd'],
        "Best LR": params['learning_rate'],
        "Test MSE": mse,
        "Test R2": r2
    })
    for feature, weight in zip(feature_names, model.weights):
        weights_report.append({
            "Model": name,
            "Feature": feature,
            "Weight": weight,
            "Penalty": penalty if penalty else "None",
            "Optimizer": optimizer.upper(),
            "Lambda": params['lambd']
        })

    print(f"{name}: MSE={mse:.2f}, R2={r2:.4f}")


weights_df = pd.DataFrame(weights_report)
results_df = pd.DataFrame(results_data)
weights_pivot = weights_df.pivot_table(index="Model", columns="Feature", values="Weight")
print("\n--- LEADERBOARD ---")
display(results_df.sort_values(by="Test MSE", ascending=True))
display(weights_pivot)

--- FINAL TESTING RESULTS ---

Linear Regression (Batch): MSE=0.46, R2=0.6533
Linear Regression (SGD): MSE=0.45, R2=0.6622
Ridge Regression (Batch): MSE=0.46, R2=0.6533
Ridge Regression (SGD): MSE=0.46, R2=0.6488
Lasso Regression (Batch): MSE=0.46, R2=0.6533
Lasso Regression (SGD): MSE=0.44, R2=0.6652

--- LEADERBOARD ---


Unnamed: 0,Model,Optimizer,Best Lambda,Best LR,Test MSE,Test R2
5,Lasso Regression (SGD),SGD,0.001,0.001,0.442763,0.66516
1,Linear Regression (SGD),SGD,0.0,0.0001,0.446701,0.662181
2,Ridge Regression (Batch),BATCH,0.0,0.1,0.458448,0.653297
0,Linear Regression (Batch),BATCH,0.0,0.1,0.458448,0.653297
4,Lasso Regression (Batch),BATCH,0.0,0.1,0.458448,0.653297
3,Ridge Regression (SGD),SGD,0.0,0.001,0.46444,0.648767


Feature,Distance_to_LA,Distance_to_SanDiego,Distance_to_SanFrancisco,Distance_to_SanJose,Distance_to_coast,Households,Latitude,Longitude,Median_Age,Median_Income,Population,Tot_Bedrooms,Tot_Rooms
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Lasso Regression (Batch),-0.201762,-0.020848,-0.068252,-0.064922,-0.276845,0.146292,-0.108615,-0.180531,0.133482,0.706024,-0.353065,0.209738,0.048588
Lasso Regression (SGD),-0.290214,0.091431,-0.031261,0.074561,-0.140409,0.114936,-0.443435,-0.593839,0.112645,0.770201,-0.463522,0.483197,-0.090085
Linear Regression (Batch),-0.201762,-0.020848,-0.068252,-0.064922,-0.276845,0.146292,-0.108615,-0.180531,0.133482,0.706024,-0.353065,0.209738,0.048588
Linear Regression (SGD),-0.282153,0.015585,-0.053449,-0.0109,-0.2333,0.214387,-0.167912,-0.339986,0.121083,0.723334,-0.442245,0.341311,-0.04925
Ridge Regression (Batch),-0.201762,-0.020848,-0.068252,-0.064922,-0.276845,0.146292,-0.108615,-0.180531,0.133482,0.706024,-0.353065,0.209738,0.048588
Ridge Regression (SGD),-0.312986,0.292653,-0.209898,0.231115,-0.136551,0.14793,-0.614212,-0.606817,0.130633,0.744273,-0.331888,0.487985,-0.104218
