# Sklearn gradient Descent

In [189]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [190]:
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)

In [191]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [192]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
from sklearn.metrics import r2_score
print('R2score of the scikit learn linear regressor is: ', r2_score(y_pred, y_test))

R2score of the scikit learn linear regressor is:  0.0069132949709930225


# Multidimensional Gradient Descent from scratch

# Batch gradient Descent

In [193]:
class GD_multi_Dim:
    
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef = None
        self.intercept = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        
    def fit(self, X_train, y_train):
        self.intercept=0
        self.coef = np.ones(X_train.shape[1])
        for i in range(self.epochs):
            y_hat = np.dot(X_train, self.coef) + self.intercept
            intercept_der = -2 * (np.mean(y_train - y_hat))
            self.intercept = self.intercept - (self.learning_rate * intercept_der)
            
            
            coef_der = (-2 * np.dot((y_train - y_hat), X_train))/(X_train.shape[0])
            self.coef = self.coef - (self.learning_rate * coef_der)
        
    def predict(self, X_test):
        return ((np.dot(X_test, self.coef)) + self.intercept)

In [194]:
gd = GD_multi_Dim(learning_rate=0.5 ,epochs=1000)

In [195]:
start = time.time()
gd.fit(X_train, y_train)
print('time taken is', time.time() - start)

time taken is 0.13599729537963867


In [196]:
y_pred2 = gd.predict(X_test)

In [197]:
print('r2 score of our gradient score is: ', r2_score(y_pred2, y_test))

r2 score of our gradient score is:  -0.09580183597920344


In [198]:
gd.intercept

151.37261723517858

In [199]:
gd.coef

array([  41.82825603, -203.237332  ,  509.65379034,  325.07884454,
        -71.07206584, -119.33265177, -215.85367971,  144.71006788,
        376.52628608,  111.97625999])

In [200]:
lr.coef_

array([  37.90031426, -241.96624835,  542.42575342,  347.70830529,
       -931.46126093,  518.04405547,  163.40353476,  275.31003837,
        736.18909839,   48.67112488])

In [201]:
lr.intercept_

151.3456553477407

# Stochastic Gradient Descent

In [202]:
class Stochastic_GD_multi_Dim:
    
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef = None
        self.intercept = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        
    def fit(self, X_train, y_train):
        self.intercept=0
        self.coef = np.ones(X_train.shape[1])
        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                idx = np.random.randint(0, X_test.shape[0])
                
                y_hat = np.dot(X_train[idx], self.coef) + self.intercept
                
                intercept_der = -2 * (y_train[idx] - y_hat)
                self.intercept = self.intercept - (self.learning_rate * intercept_der)
                
                coef_der = (-2 * np.dot((y_train[idx] - y_hat), X_train[idx]))
                self.coef = self.coef - (self.learning_rate * coef_der)
        
    def predict(self, X_test):
        return ((np.dot(X_test, self.coef)) + self.intercept)

In [203]:
sgd = Stochastic_GD_multi_Dim(learning_rate=0.05, epochs=100)

In [204]:
start = time.time()
sgd.fit(X_train, y_train)
print('time taken is:', time.time()- start)

time taken is: 1.3061416149139404


In [205]:
y_pred_sgd = sgd.predict(X_test)

In [206]:
print('R2 Score of Stochastic gradient descent is: ', r2_score(y_pred_sgd, y_test))

R2 Score of Stochastic gradient descent is:  0.07527412533924316


In [207]:
sgd.intercept

146.24662872059847

In [208]:
sgd.coef

array([ 121.94283661, -277.81795819,  410.16392678,  320.32876885,
       -134.66310813, -195.71826276, -129.3169879 ,  339.92996084,
        612.43326973,   60.07422873])

# Sklearn Stochastic Gradient Descent

In [209]:
#https://github.com/scikit-learn/scikit-learn/blob/dc580a8ef/sklearn/linear_model/_stochastic_gradient.py#L1721
from sklearn.linear_model import SGDRegressor

In [210]:
sgd_ = SGDRegressor()

In [211]:
sgd_.fit(X_train, y_train)



SGDRegressor()

In [212]:
y_sgd_sklearn = sgd.predict(X_test)

In [213]:
print('R2 score of SGD Regressor is: ', r2_score(y_sgd_sklearn, y_test))

R2 score of SGD Regressor is:  0.07527412533924316


In [214]:
sgd_.intercept_

array([152.21944078])

In [215]:
sgd_.coef_

array([  56.1226273 ,  -41.48923961,  262.26065227,  187.48687819,
         28.77369237,    4.12402476, -144.16775892,  134.15542947,
        218.40832926,  132.28957736])

# Mini batch Gradient Descent

In [216]:
import random

In [217]:
class mini_batch_GD:
    
    def __init__(self,batch_size, learning_rate=0.01, epochs=100):
        self.coef = None
        self.intercept = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        
    def fit(self, X_train, y_train):
        self.intercept=0
        self.coef = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                
                idx = random.sample(range(X_train.shape[0]), self.batch_size)
                
                y_hat = np.dot(X_train[idx], self.coef) + self.intercept
                
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept = self.intercept - (self.learning_rate * intercept_der)
                
                coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef = self.coef - (self.learning_rate * coef_der)
        
    def predict(self, X_test):
        return ((np.dot(X_test, self.coef)) + self.intercept)

In [218]:
mbgd = mini_batch_GD(batch_size= int(X_train.shape[0]/10), learning_rate=0.1, epochs=50)

In [219]:
mbgd.fit(X_train, y_train)

In [220]:
y_pred_mbgd = mbgd.predict(X_test)

In [221]:
print('The r2 score of mini batch gradient descent is:', r2_score(y_pred_mbgd, y_test))

The r2 score of mini batch gradient descent is: -0.023351770391657034
