# Batch GD + nd Data

### y = w0 + w1x1 + w2x2...
wi = wi - lr*slope_wrt_wi

w = n+1 dim vector

dJ/dw_j = 1/n*sum[ (y_i - y_i_hat)*x_i_j ]

In [14]:
from sklearn.datasets import load_diabetes
import numpy as np
X,y = load_diabetes(return_X_y=True)
print(X.shape,y.shape)

(442, 10) (442,)


In [2]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X,y,test_size=0.3)

In [3]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain,ytrain)
lr.coef_

array([ -14.42294414, -212.22245179,  494.64228515,  394.2090426 ,
       -461.75718406,  259.39729229, -111.2606147 ,   -6.12278411,
        625.74161571,   55.64822356])

In [4]:
lr.intercept_

152.41099566843042

In [5]:
class MY_GD_Regressor:
    def __init__(self,epochs = 100,lr = 0.01):
        self.coef_ = None
        self.intercept_ = None
        self.epochs = epochs
        self.lr = lr

    def fit(self,xtrain,ytrain):
        w = np.ones(xtrain.shape[1])
        b = 10

        for i in range(self.epochs):
            # vectorization
            y_hat = np.dot(xtrain,w) + b
            b = b - self.lr * np.mean(y_hat - ytrain)
            w = w - (self.lr *np.dot((y_hat - ytrain),xtrain)/xtrain.shape[0])

        self.coef_ = w
        self.intercept_ = b

    def predict(self,xtest):
        return np.dot(xtest,self.coef_) + self.intercept_

In [6]:
My_gd = MY_GD_Regressor(epochs=1000,lr=0.5)
My_gd.fit(xtrain , ytrain)

In [7]:
ypred = My_gd.predict(xtest)
from sklearn.metrics import r2_score
r2_score(ytest,ypred)

0.511105210740566

# Stochastic GD ==> Fast Convergence => Based on randomness 
### Mostly used in DL as fast and optimized for space

1. Batch GD take too much operations/computation ==> Becomes slow on large dataset
2. Batch GD has problem with hardware also ==> on LOW RAM problem occurs

### ST GD
##### See from animation ST doesn't always get better from last iter but eventually reaches the solution
1. In batch GD we update after entire data analyzed but in ST-GD we update after each row analysation
2. Less epochs required as we reach early
3. We dont need to load entire data we need 1 row at a time hence RAM not a problem now
4. y_hat will be a scaler

#### For same number of epochs Batch Gd fast but overall ST fast as it need less epochs

1. best for non convex function Batch GD donot give global min 
2. Non convex ==> more than 1 minimas in curve/surface

In [8]:
class MY_GD_Regressor_ST:
    def __init__(self,epochs = 100,lr = 0.01):
        self.coef_ = None
        self.intercept_ = None
        self.epochs = epochs
        self.lr = lr

    def fit(self,xtrain,ytrain):
        w = np.ones(xtrain.shape[1])
        b = 10

        for i in range(self.epochs):
            for j in range(xtrain.shape[0]):
                random_idx = np.random.randint(0,xtrain.shape[0])

                y_hat = np.dot(xtrain[random_idx],w) + b
                b = b - self.lr *(y_hat - ytrain[random_idx])
                w = w - self.lr *np.dot((y_hat - ytrain[random_idx]),xtrain[random_idx])
                
        self.coef_ = w
        self.intercept_ = b

    def predict(self,xtest):
        return np.dot(xtest,self.coef_) + self.intercept_

In [9]:
My_gd1 = MY_GD_Regressor_ST(epochs=50,lr=0.1)
My_gd1.fit(xtrain , ytrain)
ypred = My_gd1.predict(xtest)
from sklearn.metrics import r2_score
r2_score(ytest,ypred)

0.45559512753542264

# Mini Batch GD
1. We make group of rows called batch 
2. we make updates after analysing a batch
3. N Batches = ST GD, 1 Batch = Batch GD

In [12]:
import random
class MY_GD_Regressor_MINI_BATCH:
    def __init__(self,batch_size,epochs = 100,lr = 0.01):
        self.coef_ = None
        self.intercept_ = None
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size

    def fit(self,xtrain,ytrain):
        w = np.ones(xtrain.shape[1])
        b = 0

        for i in range(self.epochs):
            for j in range(xtrain.shape[0]//self.batch_size):
                random_batch = random.sample(range(xtrain.shape[0]),self.batch_size)

                y_hat = np.dot(xtrain[random_batch],w) + b
                b = b - self.lr * np.mean((y_hat - ytrain[random_batch]))
                w = w - self.lr *np.dot((y_hat - ytrain[random_batch]),xtrain[random_batch])
                
        self.coef_ = w
        self.intercept_ = b

    def predict(self,xtest):
        return np.dot(xtest,self.coef_) + self.intercept_

In [11]:
My_gd = MY_GD_Regressor_MINI_BATCH(batch_size= xtrain.shape[0]//20,epochs=50,lr=0.1)
My_gd.fit(xtrain , ytrain)
ypred = My_gd.predict(xtest)
from sklearn.metrics import r2_score
r2_score(ytest,ypred)

0.5256134094549896