In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load Boston Housing dataset
boston = fetch_openml(name='boston', version=1)
X = boston.data.astype(float)
y = boston.target.astype(float)

# Split and preprocess data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

class LinearRegressionSGD:
    def init(self, loss='mse', learning_rate=0.001, epochs=1000):
        self.loss = loss
        self.lr = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        m, n = X.shape
        self.weights = np.zeros(n)

        for epoch in range(self.epochs):
            indices = np.random.permutation(m)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(m):
                xi = X_shuffled[i]
                yi = y_shuffled[i]
                y_pred = xi @ self.weights + self.bias
                error = y_pred - yi

                if self.loss == 'mse':
                    dw = 2 * xi * error
                    db = 2 * error
                elif self.loss == 'mae':
                    dw = xi * np.sign(error)
                    db = np.sign(error)
                else:
                    raise ValueError("Supported losses: 'mse' or 'mae'")

                self.weights -= self.lr * dw
                self.bias -= self.lr * db

    def predict(self, X):
        return X @ self.weights + self.bias

scratch_mse = LinearRegressionGD(loss='mse', learning_rate=0.01, epochs=1000)
scratch_mae = LinearRegressionGD(loss='mae', learning_rate=0.01, epochs=1000)
scratch_mse.fit(X_train_scaled, y_train)
scratch_mae.fit(X_train_scaled, y_train)

sk_model = LinearRegression()
sk_model.fit(X_train_scaled, y_train)

def evaluate(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"\n{name}:")
    print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")

evaluate("Scratch MSE Model", scratch_mse, X_test_scaled, y_test)
evaluate("Sklearn Model", sk_model, X_test_scaled, y_test)

print("\nCoefficient Comparison:")
print(f"{'Feature':<15} {'Scratch MSE':<15} {'Scratch MAE':<15} {'Sklearn':<15}")
for i, (w_mse, w_mae, w_sk) in enumerate(zip(scratch_mse.weights,
                                            scratch_mae.weights,
                                            sk_model.coef_)):
    print(f"{i:<15} {w_mse:<15.4f} {w_mae:<15.4f} {w_sk:<15.4f}")

print(f"\nIntercept:")
print(f"Scratch MSE: {scratch_mse.bias:.4f}")
print(f"Scratch MAE: {scratch_mae.bias:.4f}")
print(f"Sklearn: {sk_model.intercept_:.4f}")



Scratch MSE Model:
MAE: 3.21
MSE: 24.69

Sklearn Model:
MAE: 3.19
MSE: 24.29

Coefficient Comparison:
Feature         Scratch MSE     Scratch MAE     Sklearn        
0               -0.9515         -0.2966         -1.0021        
1               0.5852          0.0548          0.6963         
2               0.0951          -0.1200         0.2781         
3               0.7446          0.0305          0.7187         
4               -1.9276         -0.1222         -2.0223        
5               3.2014          0.0612          3.1452         
6               -0.1933         -0.1049         -0.1760        
7               -2.9925         0.1151          -3.0819        
8               1.7005          -0.1758         2.2514         
9               -1.1784         -0.1743         -1.7670        
10              -2.0078         -0.0924         -2.0378        
11              1.1290          0.0479          1.1296         
12              -3.5897         -0.2165         -3.6117        

