In [51]:
import numpy as np 
import pandas as pd 
import statsmodels.api as sma
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [52]:
# Import Inbuild DataSet

In [53]:
from sklearn.datasets import load_diabetes

In [54]:
x,y = load_diabetes(return_X_y=True)

In [55]:
pd.DataFrame(x)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [56]:
print(x.shape)
print(y.shape)

(442, 10)
(442,)


In [57]:
# Train Test Split

x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8,random_state=2)

In [58]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(353, 10)
(89, 10)
(353,)
(89,)


# Batch Gradiant Descent

* It is an iteravtive algorithm that updates models parameters after processing the entire dataset.

* We take the average of gradients of all the training examples and then use that mean gradient to update our parameters .

* One step of GD in one epoch .

In [60]:
lr = LinearRegression()

In [61]:
lr.fit(x_train,y_train)

In [62]:
print(lr.coef_)
print(lr.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [63]:
y_pred = lr.predict(x_test)
r2_score(y_test,y_pred)

0.4399338661568969

In [64]:
# Lets check with our own function class

In [65]:
class GDRegressor:
    
    def __init__(self,learning_rate=0.01,epochs=100):
        
        self.coef_=None
        self.intercept_=None
        self.l_rate = learning_rate
        self.epochs = epochs
        
    def fit(self,x_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])
        
        for i in range(self.epochs):
            # Update all the coef and the intercept
            y_hat = np.dot(x_train,self.coef_) + self.intercept_
            # print('shape of y_hat',y_hat.shape)
            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - ( self.l_rate * intercept_der)
            
            coef_der = -2 * np.dot((y_train - y_hat),x_train) / x_train.shape[0]
            self.coef_ = self.coef_ - (self.l_rate * coef_der)
            
        print(self.intercept_,self.coef_)
        
    def predict(self,x_test):
        return np.dot(x_test,self.coef_) + self.intercept_

In [66]:
gdr = GDRegressor(epochs=1000000,learning_rate=0.5)

In [67]:
gdr.fit(x_train,y_train)

151.8833100525583 [  -9.15865318 -205.45432162  516.69374457  340.61999904 -895.55200041
  561.22067786  153.89310887  126.73139669  861.12700097   52.42112238]


In [68]:
y_pred = gdr.predict(x_test)

In [69]:
r2_score(y_test,y_pred)

0.43993386616830465

# Stochastic Gradient Descent

**Stochastic Gradient Descent (SGD) is an optimization algorithm used to train machine learning models, particularly in scenarios with large datasets. Unlike batch gradient descent, which computes gradients for the entire dataset in each iteration, SGD updates the model's parameters using only a single randomly chosen data point at a time.**

In [70]:
from sklearn.linear_model import SGDRegressor

In [91]:
sgd = SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.2)

In [92]:
sgd.fit(x_train,y_train)

In [93]:
y_pred = sgd.predict(x_test)

In [94]:
r2_score(y_test,y_pred)

0.4351558866834837

In [95]:
# Lets check with our own function class

In [135]:
class SGDregressor:
    
    def __init__(self,learning_rate=0.01,epochs=100):
        
        self.coef_=None
        self.intercept_=None
        self.l_rate = learning_rate
        self.epochs = epochs
        
    def fit(self,x_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(x_train.shape[0]):
                idx = np.random.randint(0,x_train.shape[0])
                
                # Update all the coef and the intercept
                y_hat = np.dot(x_train[idx],self.coef_) + self.intercept_
                
                intercept_der = -2 * (y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - ( self.l_rate * intercept_der)
                
                coef_der = -2 * np.dot((y_train[idx] - y_hat),x_train[idx])
                self.coef_ = self.coef_ - (self.l_rate * coef_der)
            
        print(self.intercept_,self.coef_)
            
    def predict(self,x_test):
        return np.dot(x_test,self.coef_) + self.intercept_ 

In [136]:
sgdr = SGDregressor(learning_rate=0.01,epochs=50)

In [137]:
pd.DataFrame(sgdr.fit(x_train,y_train))

149.78721874422 [  56.10564576  -64.78165961  350.554937    253.37022814   17.67730218
  -32.32881309 -170.05380257  125.52108471  329.4457835   126.62044156]


In [138]:
y_pred = sgdr.predict(x_test)

In [139]:
r2_score(y_test,y_pred)

0.4313414225885501

# Mini Batch Gradient Descent 

**It means dividing your dataset into smaller parts (mini-batches), working on these smaller parts one at a time, calculating how wrong your model's predictions are (loss), and adjusting the model's parameters (weights) to improve the predictions. This process repeats until the model gets better at making predictions.**

In [142]:
# lets do first with the inbulid function 

In [146]:
from sklearn.linear_model import SGDRegressor

import random

In [147]:
sgd = SGDRegressor(learning_rate='constant',eta0=0.1)

In [148]:
batch_size = 35

for i in range(100):
    
    idx = random.sample(range(x_train.shape[0]),batch_size)
    sgd.partial_fit(x_train[idx],y_train[idx])

In [149]:
sgd.coef_

array([  42.24255968,  -67.35305159,  341.81697617,  228.98833855,
         22.14860478,  -25.44312832, -172.28416166,  132.35880867,
        319.69069389,  107.66370809])

In [150]:
sgd.intercept_

array([159.05991239])

In [151]:
y_pred = sgd.predict(x_test)

In [152]:
r2_score(y_test,y_pred)

0.4249340284376648

In [153]:
# Lets try with our class function

In [156]:
class MBSGDregressor:
    
    def __init__(self,batch_size,learning_rate=0.01,epochs=100):
        
        self.coef_=None
        self.intercept_=None
        self.l_rate = learning_rate
        self.epochs = epochs
        self.batch_size=batch_size
        
    def fit(self,x_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(int(x_train.shape[0]/self.batch_size)):
                           
                idx = random.sample(range(x_train.shape[0]),self.batch_size)
                
                # Update all the coef and the intercept
                y_hat = np.dot(x_train[idx],self.coef_) + self.intercept_
                
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - ( self.l_rate * intercept_der)
                
                coef_der = -2 * np.dot((y_train[idx] - y_hat),x_train[idx])
                self.coef_ = self.coef_ - (self.l_rate * coef_der)
            
        print(self.intercept_,self.coef_)
            
    def predict(self,x_test):
        return np.dot(x_test,self.coef_) + self.intercept_ 

In [217]:
mbg = MBSGDregressor(batch_size=int(x_train.shape[0]/35),learning_rate=0.1,epochs=90)

In [218]:
mbg.fit(x_train,y_train)

141.86083250822938 [  11.37045834 -204.57315614  537.58692344  319.94901001 -111.64712585
  -41.7958552  -176.45402241   58.32614835  539.56482733   57.09491286]


In [219]:
mbg.coef_

array([  11.37045834, -204.57315614,  537.58692344,  319.94901001,
       -111.64712585,  -41.7958552 , -176.45402241,   58.32614835,
        539.56482733,   57.09491286])

In [220]:
mbg.intercept_

141.86083250822938

In [221]:
y_pred = mbg.predict(x_test)

In [222]:
r2_score(y_test,y_pred)

0.42729599044775257

End