Stochastic Gradient Descent will converge faster as it will take less number of epochs compared to Batch Gradient Descent. But the result will be different every single time if you run the code several times because of the randomness. But that doesn't mean the result will be way out of the optimum result. It will always be close to the optimal result.

Using the sklearn SGDRegressor.

In [135]:
import numpy as np
import random

from sklearn.datasets import load_diabetes
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [136]:
X,y = load_diabetes(return_X_y=True)

In [137]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=32,test_size=0.2)

In [138]:
reg = SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01,random_state=13)
reg.fit(X_train,y_train)

y_pred = reg.predict(X_test)

In [139]:
print(reg.intercept_)
print(reg.coef_)

[156.83598183]
[  58.42655606  -66.39515464  299.27488581  223.30520732   25.13964966
   -6.37350383 -163.93842378  132.45833025  261.9732631   148.70191095]


In [140]:
r2 = r2_score(y_test,y_pred)
r2

0.40735173596154983

SGDRegressor from scratch

In [141]:
class StochasticGDRegressor():
    def __init__(self,epochs = 1000,learning_rate = 0.01):
        self.intercept_ = None
        self.coef_ = None
        self.epochs = epochs
        self.lr = learning_rate
        
    def fit(self,X_train,y_train):
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                id_num = np.random.randint(0,X_train.shape[0])
                
                y_pred =np.dot(X_train[id_num],self.coef_) + self.intercept_
                der_intercept = -2 * (y_train[id_num] - y_pred)
                self.intercept_ = self.intercept_ - self.lr * der_intercept
                
                der_coef = -2 * np.dot((y_train[id_num] - y_pred),X_train[id_num])
                self.coef_ = self.coef_ - self.lr * der_coef
                
        print(self.intercept_,self.coef_)
                
        
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [155]:
sgd = StochasticGDRegressor(epochs = 40,learning_rate=0.01)
sgd.fit(X_train,y_train)

154.73776016382254 [  61.178051    -76.72102894  313.1280732   243.9079181    16.43913503
  -16.68542235 -168.21183456  131.85417436  271.21971047  154.0469652 ]


In [156]:
y_pred = sgd.predict(X_test)

In [157]:
r2s = r2_score(y_test,y_pred)
r2s

0.41919378099370885