# SGD Regressor

In [1]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [136]:
class SGDRegressor:

    def __init__(self, lr=0.01, epochs=100):
        self.intercept_ = None
        self.coef_ = None
        self.lr = lr
        self.epochs = epochs

    def fit(self, X_train, y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        for epoch in range(self.epochs):
            for n in range(X_train.shape[0]):                              # n updates each epoch
                idx = np.random.randint(0, X_train.shape[0])               # selecting a random row index
                y_pred = self.intercept_ + (X_train[idx] @ self.coef_)     # y_pred for that one row/point (1 value)
                
                # updating the intercept
                gradient_intercept = -2 * (y_train[idx] - y_pred)                         # gradient calculated using that row only
                self.intercept_ = self.intercept_ - (self.lr * gradient_intercept)    

                # updating the coefficients 
                gradient_coef = -2 * np.dot(y_train[idx] - y_pred, X_train[idx])     # gradient calcualated using that row only
                self.coef_ = self.coef_ - (self.lr * gradient_coef)                  # update of gradient for that point

        print(f'Intercept : {self.intercept_}\nCoefficients : {self.coef_}')
    
    def predict(self, X_test):
        return self.intercept_ + (X_test @ self.coef_)

In [137]:
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [138]:
sgd = SGDRegressor(lr=0.1, epochs=10)
sgd.fit(X_train, y_train)

Intercept : 143.89465767438458
Coefficients : [  60.66572215 -164.92753835  450.55026936  312.80810936  -45.27571299
  -96.72965086 -217.48622469  146.73410548  364.94730473  144.01991631]


In [104]:
y_pred = sgd.predict(X_test)
r2_score(y_test, y_pred)

0.4351038036236289

In [130]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(max_iter=150, learning_rate='adaptive', eta0=0.01, tol=0.001)
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test)

r2_score(y_test, y_pred)



0.4554691921093781

In [131]:
sgd.intercept_, sgd.coef_

(array([151.39109982]),
 array([  52.67709274, -128.62757288,  414.03339134,  278.59144203,
         -24.50972145,  -67.56457854, -196.78437463,  147.45563276,
         316.73896821,  142.93779211]))