# MINI BATCH GRADIENT DESCENT

In [None]:
# Types of gradient descent
    # Mini-batch gradient descent : 
        # Mini-Batch Gradient Descent updates the weights using small batches of data (e.g., 16, 32, 64, 128).
        # It combines the stability of batch GD and the speed of SGD, making it the most commonly used method in deep learning.

# CODE SINGLE VARIATE

In [266]:
# importing necessasary packages
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import random 

# Dataset

X,y = make_regression(n_samples = 100, n_features=1, n_targets = 1, noise = 80, random_state = 42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state=42)
lr = LinearRegression()
lr.fit(X_train, y_train)
print("(OLS) R2 Score: ", r2_score(y_test, lr.predict(X_test)))


(OLS) R2 Score:  0.19000684388722766


In [267]:
# importing necessasary packages
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import random

# Dataset

X,y = make_regression(n_samples = 100, n_features=1, n_targets = 1, noise = 80, random_state = 42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state=42)

class MBGradientDescent:
    def __init__(self, epoch = 200, learning_rate = 0.01, batch_size = 5):
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.slope = np.zeros(X.shape[1])
        self.intercept = 0

    def fit(self, X, y):
        self.X = X
        self.y = y
        
        for epoch in range(self.epoch):
            index = np.arange(self.X.shape[0])
            np.random.shuffle(index)
            
            for i in range(0, self.X.shape[0], self.batch_size):
                batch_sizes = index[i:i+self.batch_size] 
                X_batch, y_batch = self.X[batch_sizes], self.y[batch_sizes]
                y_hat = np.dot(X_batch, self.slope) + self.intercept
                error = y_batch - y_hat
                # Slope
                # -2 * ∑ x * error
                dm = -(2/len(X_batch)) * np.dot(X_batch.T, error)
                step_size_m = self.learning_rate * dm
                self.slope = self.slope - step_size_m
        
                # Intercept
                # -2 * ∑ error
                db = -(2/len(X_batch)) * np.sum(error)
                step_size_b = self.learning_rate * db
                self.intercept = self.intercept - step_size_b
                
    def fit_with_learning_schedule(self, X, y, weight_decay = 0.1):
        self.slope = np.zeros(X.shape[1])
        self.weight_decay = weight_decay
        self.intercept = 0
        self.X = X
        self.y = y
        
        for epoch in range(self.epoch):
            current_learning_rate = self.learning_rate/(1+(epoch*self.weight_decay))
            index = np.arange(self.X.shape[0])
            np.random.shuffle(index)
            
            for i in range(0, self.X.shape[0], self.batch_size):
                batch_sizes = index[i:i+self.batch_size] 
                X_batch, y_batch = self.X[batch_sizes], self.y[batch_sizes]
                y_hat = np.dot(X_batch, self.slope) + self.intercept
                error = y_batch - y_hat
                # Slope
                # -2 * ∑ x * error
                dm = -(2/len(X_batch)) * np.dot(X_batch.T, error)
                step_size_m = current_learning_rate * dm
                self.slope = self.slope - step_size_m
        
                # Intercept
                # -2 * ∑ error
                db = -(2/len(X_batch)) * np.sum(error)
                step_size_b = current_learning_rate * db
                self.intercept = self.intercept - step_size_b

    def predict(self, X):
        self.X_test = X

        return np.dot(X, self.slope) + self.intercept
        
objecta = MBGradientDescent(epoch = 300, learning_rate = 0.001, batch_size = 5)
# objecta.fit(X_train, y_train)
objecta.fit_with_learning_schedule(X_train, y_train, weight_decay = 0.001)
print("(MBGD) R2 Score: ", r2_score(y_test, objecta.predict(X_test)))

(MBGD) R2 Score:  0.1891618676608645


# CODE MULTI VARIATE

In [263]:
# Using OLS
# Importing necessary packages

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np
import pandas

# Importing dataset
X = load_diabetes()['data']
y = load_diabetes()['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

# Applying LR model
lr = LinearRegression()
lr.fit(X_train, y_train)
y_predict = lr.predict(X_test)

# Computing metrics
print("(OLS) R2 Score: ", r2_score(y_test, y_predict))

(OLS) R2 Score:  0.4772897164322617


In [262]:
# Importing necessary packages
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import random
import numpy as np
import pandas

# Importing dataset
X = load_diabetes()['data']
y = load_diabetes()['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)


class MBGradientDescent:
    def __init__(self, epoch = 100, learning_rate = 0.01, batch_size = 10):
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.batch_size = batch_size

    def fit(self, X, y):
        self.X = X
        self.y = y
        self.slope = np.zeros(X.shape[1])
        self.intercept = 0
    
        for _ in range(self.epoch):
            # shuffle indices once per epoch
            indices = np.arange(self.X.shape[0])
            np.random.shuffle(indices)
    
            # slice into non-overlapping batches
            for i in range(0, self.X.shape[0], self.batch_size):
                batch_idx = indices[i:i+self.batch_size]
                X_batch, y_batch = self.X[batch_idx], self.y[batch_idx]
    
                y_predict = np.dot(X_batch, self.slope) + self.intercept
                error = y_batch - y_predict
    
                # slope update
                dm = (-2/len(X_batch)) * np.dot(X_batch.T, error)
                step_size_m = self.learning_rate * dm
                self.slope = self.slope - step_size_m
    
                # intercept update
                db = (-2/len(X_batch)) * np.sum(error)
                step_size_b = self.learning_rate * db
                self.intercept = self.intercept - step_size_b
    
        return self.slope, self.intercept



    def fit_with_learning_schedule(self, X, y, weight_decay = 0.01):
        self.X = X
        self.y = y
        self.weight_decay = weight_decay
        
        self.slope = np.zeros(X.shape[1])
        self.intercept = 0
    
        for epoch in range(self.epoch):
            current_learning_rate = self.learning_rate/(1+(epoch*weight_decay))
            # shuffle indices once per epoch
            indices = np.arange(self.X.shape[0])
            np.random.shuffle(indices)
    
            # slice into non-overlapping batches
            for i in range(0, self.X.shape[0], self.batch_size):
                batch_idx = indices[i:i+self.batch_size]
                X_batch, y_batch = self.X[batch_idx], self.y[batch_idx]
    
                y_predict = np.dot(X_batch, self.slope) + self.intercept
                error = y_batch - y_predict
    
                # slope update
                dm = (-2/len(X_batch)) * np.dot(X_batch.T, error)
                step_size_m = current_learning_rate * dm
                self.slope = self.slope - step_size_m
    
                # intercept update
                db = (-2/len(X_batch)) * np.sum(error)
                step_size_b = current_learning_rate * db
                self.intercept = self.intercept - step_size_b
    
        return self.slope, self.intercept
        
    def predict(self, X):
        y_predict = np.dot(X, self.slope) + self.intercept
        return y_predict

test = MBGradientDescent(epoch = 250, learning_rate = 0.1)
#print(test.fit(X_train, y_train))
print(test.fit_with_learning_schedule(X_train, y_train, weight_decay = 0.01))
test.predict(X_test)

print("(MBGD) R2 Score: ", r2_score(y_test, test.predict(X_test)))

(array([  45.7179456 , -188.40863816,  492.67812894,  331.54825332,
        -66.6271437 ,  -92.18924711, -230.47371538,  151.76789628,
        315.97368669,  121.4318726 ]), np.float64(151.8178446836638))
(MBGD) R2 Score:  0.4809104489257331
