<a href="https://colab.research.google.com/github/Kashishkewat/machine-learning/blob/main/mini_batch_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Mini Gradient Descent**

In [None]:
from sklearn.datasets import load_diabetes
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [None]:
X,y=load_diabetes(return_X_y=True)


In [None]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
reg=LinearRegression()
reg.fit(X_train,y_train)

In [None]:
y_pred=reg.predict(X_test)
r2_score(y_test,y_pred)


0.4399338661568968

In [None]:
reg.coef_
reg.intercept_

np.float64(151.88331005254167)

In [None]:
import random
import numpy as np # Ensure numpy is imported for array operations

class MBGDregressor:
  def __init__(self,batch_size,learning_rate=0.01,epochs=100):
    self.lr=learning_rate
    self.epochs=epochs
    self.m=None
    self.b=None
    self.batch_size=batch_size

  def fit(self,X_train,y_train):
    self.m=np.zeros(X_train.shape[1])
    self.b=0

    for i in range(self.epochs):
      # Fixed: Cast the result of division to an integer for the loop range
      for j in range(int(X_train.shape[0]/self.batch_size)):
        idx=random.sample(range(X_train.shape[0]),self.batch_size)

        X_batch = X_train[idx]
        y_batch = y_train[idx]

        y_hat=np.dot(X_batch,self.m)+self.b

        error = y_batch - y_hat

        # Fixed: Calculate intercept derivative as the mean of errors for the batch
        intercept_der = -2 * error.mean()
        self.b = self.b - (self.lr * intercept_der)

        # Fixed: Calculate coefficient derivatives as the mean of gradients for the batch
        # Using broadcasting for element-wise multiplication then mean across samples
        coef_der = -2 * np.mean(error[:, np.newaxis] * X_batch, axis=0)
        self.m = self.m - (self.lr * coef_der)

    print(self.m,self.b)

  def predict(self,X_test):
    return  np.dot(X_test,self.m)+self.b


In [None]:
mbr=MBGDregressor(batch_size=int(X_train.shape[0]/120),learning_rate=0.01,epochs=100)

In [None]:
mbr.fit(X_train,y_train)

[  53.0535892   -76.78877531  353.98120091  248.26256109   20.98591233
  -25.53657072 -175.6785211   130.15948295  323.16892755  126.95085683] 151.1125010359067


In [None]:
y_pred=mbr.predict(X_test)
r2_score(y_test,y_pred)

0.43551475245607885

In [None]:
#mini batch gradient descent using sklearn
from sklearn.linear_model import SGDRegressor

In [None]:
sgd=SGDRegressor(learning_rate='constant',eta0=0.2)

In [None]:
batch_size=35
for i in range(100):
  idx=random.sample(range(X_train.shape[0]),batch_size)
  sgd.partial_fit(X_train[idx],y_train[idx])

In [None]:
sgd.coef_

array([   5.1998882 , -143.80590421,  477.48151801,  289.81464736,
        -30.06824117,  -91.21749918, -186.58583417,   94.33091668,
        392.59546951,  114.0375345 ])

In [None]:
sgd.intercept_

array([167.86477888])

In [None]:
y_pred=sgd.predict(X_test)
r2_score(y_test,y_pred)


0.41565932882995194