<a href="https://colab.research.google.com/github/Ehtisham1053/Optimization-ML-Algorithms/blob/main/mini_batch_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

In [2]:
df = pd.read_csv('audi.csv')
x = df.drop('price', axis=1)
y = df['price']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
c = ColumnTransformer([('encoder', OneHotEncoder(handle_unknown='ignore' , sparse_output=False , drop='first'), ['transmission', 'fuelType', 'model']),
                       ('scaler', StandardScaler(), ['mileage', 'tax', 'mpg', 'engineSize', 'year'])

                       ], remainder='passthrough')

x_train = c.fit_transform(x_train)
x_test = c.transform(x_test)



In [5]:
import numpy as np

class MiniBatchGradientDescent:
    def __init__(self, learning_rate=0.01, epochs=1000, batch_size=10):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.m = None
        self.b = None

    def fit(self, X, y):
        y= y.to_numpy()
        n_samples, n_features = X.shape
        self.m = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epochs):
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                y_pred = np.dot(X_batch, self.m) + self.b
                error = y_pred - y_batch

                # Compute gradients
                dm = (1/len(y_batch)) * np.dot(X_batch.T, error)
                db = (1/len(y_batch)) * np.sum(error)

                # Update parameters
                self.m -= self.learning_rate * dm
                self.b -= self.learning_rate * db

    def predict(self, X):
        return np.dot(X, self.m) + self.b

    def mse(self, X, y):
        return np.mean((self.predict(X) - y) ** 2)

    def mae(self, X, y):
        return np.mean(np.abs(self.predict(X) - y))

    def rmse(self, X, y):
        return np.sqrt(self.mse(X, y))

    def r2_score(self, X, y):
        y_mean = np.mean(y)
        ss_total = np.sum((y - y_mean) ** 2)
        ss_residual = np.sum((y - self.predict(X)) ** 2)
        return 1 - (ss_residual / ss_total)

    def adjusted_r2(self, X, y):
        n, k = X.shape
        r2 = self.r2_score(X, y)
        return 1 - ((1 - r2) * (n - 1) / (n - k - 1))


In [6]:
model = MiniBatchGradientDescent(learning_rate=0.1 , epochs=1000 , batch_size=10)
model.fit(x_train, y_train)


In [8]:
y_pred = model.predict(x_test)

print("MSE:", model.mse(x_test, y_test))
print("MAE:", model.mae(x_test, y_test))
print("RMSE:", model.rmse(x_test, y_test))
print("R² Score:", model.r2_score(x_test, y_test))
print("Adjusted R² Score:", model.adjusted_r2(x_test, y_test))

MSE: 20666018.694263145
MAE: 3102.1328874537426
RMSE: 4545.98929764063
R² Score: 0.8632562217728721
Adjusted R² Score: 0.86110739097216


# Mini-Batch Gradient Descent (MBGD)
Mini-Batch Gradient Descent is a middle ground between BGD and SGD. Instead of computing gradients using the entire dataset (like BGD) or a single sample (like SGD), it updates parameters using a small subset (mini-batch) of the dataset at each iteration.

##When to Use:
MBGD is suitable when the dataset is too large for BGD but still needs a more stable update process than SGD. It is widely used in deep learning and large-scale machine learning applications where a balance between speed and accuracy is needed.

##Advantages:
Faster than BGD and more stable than SGD.
Can leverage GPU acceleration efficiently by processing mini-batches in parallel.
Reduces memory requirements compared to BGD while still achieving smooth convergence.

##Disadvantages:
Requires tuning the mini-batch size, which affects performance.
Can still experience some noise in convergence, though less than SGD.
Might not be as fast as SGD when processing individual data points in real-time scenarios.