In [44]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [45]:
X,Y = load_diabetes(return_X_y=True)

In [46]:
print(X.shape)

(442, 10)


In [47]:
print(Y.shape)

(442,)


In [48]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=2)

In [49]:
reg = LinearRegression()
reg.fit(X_train,Y_train)

In [50]:
print(reg.coef_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]


In [51]:
print(reg.intercept_)

151.88331005254167


In [52]:
Y_pred = reg.predict(X_test)

In [53]:
r2_score(Y_test,Y_pred)

0.4399338661568968

In [54]:
X_train.shape[1]

10

In [55]:
import random
class MBGD_Regessor:

    def __init__(self,batch_size,learning_rate=0.01,epochs=100):
        self.coef_ = None
        self.intercept_ =  None
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self,X_train,Y_train):
        # init your coeffs, X_train will give size of coefficients
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        for  i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)): # number of batches.
                # (-2/n)* summation(Y(i)-Y_pred(i))
                idx = random.sample(range(X_train.shape[0]),self.batch_size)
                Y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_ # vectorization
                #print(Y_hat.shape)
                intercept_derivative = -2*np.mean(Y_train[idx]-Y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_derivative)

                coef_derivative = -2* np.dot((Y_train[idx] - Y_hat),X_train[idx])
                self.coef_ =  self.coef_ - (self.lr* coef_derivative)
        print(self.intercept_)
        print(self.coef_)

    def predict(self,X_test):
        return (np.dot(X_test,self.coef_) + self.intercept_)
        


In [56]:
mbr = MBGD_Regessor(batch_size=int(X_train.shape[0]/10),learning_rate=0.01,epochs=50)

In [57]:
mbr.fit(X_train,Y_train)

151.372607432409
[  54.77630266  -66.26462313  343.80951378  239.18775624   19.49978266
  -27.08067467 -167.71589174  126.96009521  321.82503411  136.0699729 ]


In [58]:
Y_pred = mbr.predict(X_test)
r2_score(Y_test,Y_pred)

0.43225095479665965

In [59]:
mbr = MBGD_Regessor(batch_size=int(X_train.shape[0]/20),learning_rate=0.01,epochs=50)
mbr.fit(X_train,Y_train)
Y_pred = mbr.predict(X_test)
r2_score(Y_test,Y_pred)

150.6170528732549
[  49.41729562  -56.49106935  349.95608075  247.82286711   25.14376235
  -18.39372108 -169.12002138  130.26025827  314.6464792   127.02287762]


0.4298820604686745

In [60]:
mbr = MBGD_Regessor(batch_size=int(X_train.shape[0]/50),learning_rate=0.01,epochs=50)
mbr.fit(X_train,Y_train)
Y_pred = mbr.predict(X_test)
r2_score(Y_test,Y_pred)

151.67268669069782
[  52.22919311  -61.44651241  345.84031518  255.09462828   17.46272377
  -28.13103518 -167.39273329  128.66028223  313.25727549  126.22711084]


0.4315307825628921

In [61]:
mbr = MBGD_Regessor(batch_size=int(X_train.shape[0]/50),learning_rate=0.01,epochs=75)
mbr.fit(X_train,Y_train)
Y_pred = mbr.predict(X_test)
r2_score(Y_test,Y_pred)

150.89197605875313
[  33.89255279 -119.85982182  416.42788607  283.383141      0.90089519
  -59.30471058 -187.82293224  122.67746807  376.68317852  122.66592389]


0.4499294073086503

### Note: No direct implementation of Mini Batch in Scikit Learn

In [62]:
from sklearn.linear_model import SGDRegressor

In [63]:
sgd = SGDRegressor(learning_rate='constant',eta0=0.2)

In [64]:
batch_size = 35

for  i in range(100): # 100 epochs
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],Y_train[idx]) # subset of X_train and Y_train only one epoch

In [65]:
sgd.coef_

array([  27.84078578, -122.60757596,  440.52434815,  314.18733374,
        -19.52229257, -100.84618755, -190.5751438 ,  110.57818135,
        421.55070578,  117.11321328])

In [66]:
sgd.intercept_

array([119.81411471])

In [67]:
y_pred = sgd.predict(X_test)

In [68]:
r2_score(Y_test,Y_pred)

0.4499294073086503

In [71]:
sgd = SGDRegressor(learning_rate='constant',eta0=0.1)
batch_size = 35

for  i in range(100): # 100 epochs
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],Y_train[idx]) # subset of X_train and Y_train only one epoch
Y_pred = sgd.predict(X_test)
r2_score(Y_test,Y_pred)

0.4291112842373427