In [4]:
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [5]:
class SGDRegresserCustom:
    def __init__(self):
        self._coef = None
        self._intercept = None
    
    def fit(self, X, y, learning_rate=0.001, n_iterations=800):
        m, n = X.shape
        X_b = np.c_[np.ones((m, 1)), X]  # Add bias term
        y = y.to_numpy().reshape(-1, 1)  # For pandas Series   
        y = y.reshape(-1, 1)
        theta = np.random.randn(n + 1, 1)  # Random initialization

        for iteration in range(n_iterations): 
            for i in range(m):  # Iterate through each sample
                random_index = np.random.randint(m)  # Pick a random sample
                xi = X_b[random_index:random_index+1]  # Select the feature
                yi = y[random_index:random_index+1]  # Select the target
                gradient = 2 * xi.T.dot(xi.dot(theta) - yi)  # Compute gradient
                theta -= learning_rate * gradient  # Update parameters

        self._intercept = theta[0, 0]  # Extract intercept as scalar
        self._coef = theta[1:].flatten().tolist()  # Convert coefficients to a list

        return self._intercept, self._coef
    
    def predict(self, X_test):
        return X_test.dot(self._coef)+ self._intercept

**Result on dataset 1: _Boston Housing Dataset_**

In [6]:
data = pd.read_csv('BostonHousing.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column
# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [7]:
# Do feature scaling of the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test) 

In [8]:
# Results using custom model
sgd_custom1 = SGDRegresserCustom()
sgd_custom1.fit(X_train_transformed, Y_train)
pred_custom = sgd_custom1.predict(X_test_transformed)
print("MSE: ",mean_squared_error(Y_test, pred_custom))
print("R2 Score:", r2_score(Y_test, pred_custom))

MSE:  23.948665803648723
R2 Score: 0.6734292876818868


In [9]:
# Results from sklearn's model
sgd_sklearn1 = SGDRegressor()
sgd_sklearn1.fit(X_train_transformed, Y_train)
sklearn_pred = sgd_sklearn1.predict(X_test_transformed)
print("MSE: ", mean_squared_error(Y_test, sklearn_pred))
print("R2 Score:", r2_score(Y_test, sklearn_pred))

MSE:  24.52464370408855
R2 Score: 0.6655750917626524


**Result on dataset 2: _Advertising Dataset_**

In [10]:
data = pd.read_csv('advertising.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column
# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Do feature scaling of the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(X_train)
X_test_transformed = scaler.transform(X_test)

In [11]:
# Results using custom model
sgd_custom2 = SGDRegresserCustom()
sgd_custom2.fit(X_train_transformed, Y_train)
pred_custom = sgd_custom2.predict(X_test_transformed)
print("MSE: ",mean_squared_error(Y_test, pred_custom))
print("R2 Score:", r2_score(Y_test, pred_custom))

MSE:  26.738296425520854
R2 Score: 0.13471376664517254


In [12]:
# Results from sklearn's model
sgd_sklearn2 = SGDRegressor()
sgd_sklearn2.fit(X_train_transformed, Y_train)
sklearn_pred = sgd_sklearn2.predict(X_test_transformed)
print("MSE: ", mean_squared_error(Y_test, sklearn_pred))
print("R2 Score:", r2_score(Y_test, sklearn_pred)) 

MSE:  27.459854273854894
R2 Score: 0.11136321121724158
