In [34]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
import random

In [170]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data,columns=diabetes.feature_names)
y = pd.DataFrame(diabetes.target,columns=['target'])
print(X.shape)
print(y.shape)
print(y['target'].shape)

(442, 10)
(442, 1)
(442,)


In [4]:
from sklearn.datasets import load_diabetes
# diabetes = load_diabetes()
# diabetes.keys()
X,y = load_diabetes(return_X_y=True)
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In Python, (10,) is a one-tuple (the , being necessary to distinguish it from the use of parentheses for grouping: (10) just means 10), whereas (10,1) is a pair (a 2-tuple). So np.ones((10,)) creates a one-dimensional array of size 10, whereas np.ones((10,1)) creates a two-dimensional array of dimension 10×1. This is directly analogous to, say, the difference between a single number and a one-dimensional array of length 1.


In [5]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

(353, 10) (89, 10) (353,) (89,)


In [6]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression(fit_intercept=True)
lr.fit(X_train,y_train)

print(lr.intercept_)
print(lr.coef_)
y_pred = lr.predict(X_test)
score= r2_score(y_test,y_pred)
print(score)

151.34560453985995
[  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
0.4526027629719195


In [7]:
class BatchGDRegressor:
    def __init__(self,learning_rate=0.01,epochs=100):
        self.intercept=None
        self.coef=None
        self.learning_rate = learning_rate
        self.epochs=epochs
        
    def fit(self,X_train,y_train):
        self.coef = np.ones(X_train.shape[1])
        self.intercept = 0
        for _ in range(self.epochs):
            y_hat = self.intercept + np.dot(X_train,self.coef)
            loss = y_train-y_hat
            df_intercept = -2*np.mean(loss)
            self.intercept = self.intercept - (self.learning_rate * df_intercept)
            df_coef = -2*np.dot(loss,X_train)/X_train.shape[0]
            self.coef = self.coef - (self.learning_rate * df_coef)
        return self.intercept,self.coef
        
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef) + self.intercept
        return y_pred

In [9]:
bgd = BatchGDRegressor(epochs=1000,learning_rate=0.5)
intercept,coef = bgd.fit(X_train,y_train)
print(intercept)
print(coef)
y_pred = bgd.predict(X_test)
score = r2_score(y_test,y_pred)
print(score)

151.372591059285
[  41.82977756 -203.23644652  509.6557063   325.07401153  -71.07194191
 -119.33187737 -215.85264692  144.71021659  376.52729984  111.97619094]
0.4588776166235029


In [11]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(learning_rate='constant',eta0=0.01,warm_start=True)
sgd.fit(X_train,y_train)
print(sgd.coef_)
print(sgd.intercept_)
y_pred = sgd.predict(X_test)
score = r2_score(y_test,y_pred)
print(score)

[  55.46264769 -106.32985493  384.06442669  262.17455583  -10.69020551
  -50.15035438 -188.76630042  148.23826818  298.24881338  146.87104672]
[154.1067503]
0.45183977944990117


In [12]:
class StocasticGDRegressor:
    def __init__(self,learning_rate=0.01,epochs=100):
        self.intercept=None
        self.coef=None
        self.learning_rate = learning_rate
        self.epochs=epochs
        
    def fit(self,X_train,y_train):
        self.coef = np.ones(X_train.shape[1])
        self.intercept = 0
        for _ in range(self.epochs):
            for _ in range(X_train.shape[0]):
                idx = np.random.randint(0,X_train.shape[0])
                y_hat = self.intercept + np.dot(X_train[idx],self.coef)
                loss = y_train[idx]-y_hat
                df_intercept = -2*loss
                self.intercept = self.intercept - (self.learning_rate * df_intercept)
                df_coef = -2*np.dot(loss,X_train[idx])
                self.coef = self.coef - (self.learning_rate * df_coef)
        return self.intercept,self.coef
        
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef) + self.intercept
        return y_pred

In [23]:
# SGD needs less epochs - here # of gradient update = epochs * n
sgd = StocasticGDRegressor(epochs=100,learning_rate=0.1)
intercept,coef = sgd.fit(X_train,y_train)
print(intercept)
print(coef)
y_pred = sgd.predict(X_test)
score = r2_score(y_test,y_pred)
print(score)

167.9925039579993
[  12.563505   -256.39027677  579.82838362  332.60177854 -186.04136159
  -65.07458623 -195.76948479  151.42450797  461.00413586   44.70253727]
0.43055146784853027


In [54]:
batch_size = 64
epochs = 100
sgd = SGDRegressor(learning_rate='constant',eta0=0.01,warm_start=True)
for _ in range(epochs):
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])

print(sgd.coef_)
print(sgd.intercept_)
y_pred = sgd.predict(X_test)
score = r2_score(y_test,y_pred)
print(score)

[ 32.23477084  -8.87458169 117.67526137  83.89565739  26.62794724
  15.42120581 -71.90362873  74.87909418 107.00275493  69.29421118]
[149.01781488]
0.2710105748004237


In [43]:
class MiniBatchGDRegressor:
    def __init__(self,batch_size=1,learning_rate=0.01,epochs=100):
        self.intercept=None
        self.coef=None
        self.learning_rate = learning_rate
        self.epochs=epochs
        self.batch_size = batch_size
        
    def fit(self,X_train,y_train):
        self.coef = np.ones(X_train.shape[1])
        self.intercept = 0
        for _ in range(self.epochs):
            for _ in range(int(X_train.shape[0]/self.batch_size)):
                idx = random.sample(range(X_train.shape[0]),self.batch_size)
                y_hat = self.intercept + np.dot(X_train[idx],self.coef)
                loss = y_train[idx]-y_hat
                df_intercept = -2*np.mean(loss)
                self.intercept = self.intercept - (self.learning_rate * df_intercept)
                df_coef = -2*np.dot(loss,X_train[idx])
                self.coef = self.coef - (self.learning_rate * df_coef)
        return self.intercept,self.coef
        
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef) + self.intercept
        return y_pred

In [44]:
# batch_size = int(X_train.shape[0]/50)
mbgd = MiniBatchGDRegressor(epochs=100,learning_rate=0.1,batch_size=8)
intercept,coef = mbgd.fit(X_train,y_train)
print(intercept)
print(coef)
y_pred = mbgd.predict(X_test)
score = r2_score(y_test,y_pred)
print(score)

147.34777745632272
[  60.51125733 -234.68484735  556.16769404  325.05564641 -178.46339557
  -58.80419932 -196.24675531  177.9268573   461.18325203   51.89122214]
0.4432935080955057


In [59]:
#