In [13]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [14]:
class SGDRegresserCustom:
    def __init__(self):
        self._coef = None
        self._intercept = None
    
    def fit(self, X, y, learning_rate=0.001, n_iterations=800):
        m, n = X.shape
        X_b = np.c_[np.ones((m, 1)), X]  # Add bias term
        y = y.to_numpy().reshape(-1, 1)  # For pandas Series   
        y = y.reshape(-1, 1)
        theta = np.random.randn(n + 1, 1)  # Random initialization

        for iteration in range(n_iterations): 
            for i in range(m):  # Iterate through each sample
                random_index = np.random.randint(m)  # Pick a random sample
                xi = X_b[random_index:random_index+1]  # Select the feature
                yi = y[random_index:random_index+1]  # Select the target
                gradient = 2 * xi.T.dot(xi.dot(theta) - yi)  # Compute gradient
                theta -= learning_rate * gradient  # Update parameters

        self._intercept = theta[0, 0]  # Extract intercept as scalar
        self._coef = theta[1:].flatten().tolist()  # Convert coefficients to a list

        return self._intercept, self._coef
    
    def predict(self, X_test):
        return X_test.dot(self._coef)+ self._intercept

**Results on dataset 1: _Boston Housing Dataset_**

In [15]:
data = pd.read_csv('Boston.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column
# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [16]:
# Do feature scaling of the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [17]:
# Results using custom model

sgd_custom = SGDRegresserCustom()
sgd_custom.fit(X_train_transformed, Y_train)

(22.735206598163924,
 [-1.0343975366940732,
  0.8810635680559141,
  0.4038468227545614,
  1.3006093218170856,
  -2.0538590733039683,
  3.0848126408943677,
  -0.3342056513542198,
  -2.9689652043840264,
  2.4474714409614906,
  -1.635300656627362,
  -1.9689001020216175,
  1.0851917368732382,
  -3.6019315330017156])

In [18]:
# Results from custom model
pred_custom = sgd_custom.predict(X_test_transformed)
print("MSE: ",mean_squared_error(Y_test, pred_custom))
print("R2 Score:", r2_score(Y_test, pred_custom))

MSE:  23.998118391961302
R2 Score: 0.6727549383413589


In [None]:
# Results from sklearn's model
sgd_sklearn = SGDRegressor()
sgd_sklearn.fit(X_train_transformed, Y_train)
sklearn_pred = sgd_sklearn.predict(X_test_transformed)
print("MSE: ", mean_squared_error(Y_test, sklearn_pred))
print("R2 Score:", r2_score(Y_test, sklearn_pred)) 

MSE:  24.918906451717632
R2 Score: 0.6601988145458185


**Results on dataset 2: _Advertising Dataset_**