In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y = True)

In [3]:
X.shape

(442, 10)

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state = 1)

# SGDRegression

In [5]:
class SGDRegression:
    def __init__(self,learning_rate=0.01,epoch=20,intercept=-120):
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.intercept = intercept

    def fit(self,X,y):
        self.coef_ = np.ones(X.shape[1])

        for i in range(self.epoch):
            for j in range(X.shape[0]):
                
                index = np.random.randint(0,X.shape[0])    # To randomly select the rows

                # Applying the Stochastic Gradient Descent
                y_hat = self.intercept + np.dot(X[index],self.coef_)
                intercept_slope = -2*(y[index]-y_hat)
                self.intercept = self.intercept - (self.learning_rate*intercept_slope)

                coef_slope = -2 * np.dot((y[index]-y_hat),X[index])
                self.coef_ = self.coef_ - (self.learning_rate*coef_slope)
        return self.intercept,self.coef_
        
    def predict(self,X):
        return self.intercept+np.dot(X,self.coef_)

In [51]:
reg = SGDRegression(0.1,100,500)

In [52]:
reg.fit(X_train,y_train)

(150.05825991038702,
 array([ -30.14232716, -238.21793301,  602.10897141,  331.94385429,
         -93.29804737,  -83.28591465, -154.48331152,  156.40522517,
         480.50329212,   40.25836314]))

In [57]:
y_pred_SGD = reg.predict(X_test)

# Sklearn Multiple Linear Regression

In [53]:
from sklearn.linear_model import LinearRegression
reg_sk = LinearRegression()
reg_sk.fit(X_train,y_train)

In [56]:
y_pred = reg_sk.predict(X_test)

# Comparing Coefficients and Intrecept

In [54]:
print("Coef of SGDRegression:\n",reg.coef_)
print("\nCoef of Sklearn Multiple Linear Regression:\n",reg_sk.coef_)

Coef of SGDRegression:
 [ -30.14232716 -238.21793301  602.10897141  331.94385429  -93.29804737
  -83.28591465 -154.48331152  156.40522517  480.50329212   40.25836314]

Coef of Sklearn Multiple Linear Regression:
 [ -13.30451123 -238.04081712  582.21703736  345.71742256 -647.57486397
  340.86808364   90.17766358  217.61268208  664.5599925    46.12369044]


In [55]:
print("Intercept of SGDRegression:",reg.intercept)
print("\nIntercept of Sklearn Multiple Linear Regression:",reg_sk.intercept_)

Intercept of SGDRegression: 150.05825991038702

Intercept of Sklearn Multiple Linear Regression: 151.43052256540983


# Comparing r2_score

In [59]:
from sklearn.metrics import r2_score
print("r2_score of SGDRegression:",r2_score(y_pred_SGD,y_test))
print("r2_score of Sklearn Multiple Linear Regression:",r2_score(y_pred,y_test))

r2_score of SGDRegression: -0.048171925067625576
r2_score of Sklearn Multiple Linear Regression: -0.06315004711311589
