# Mini Batch Gradient Descent

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib notebook

In [3]:
# Generate sample data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [4]:
# Function to compute the cost function (Mean Squared Error)
def compute_cost(theta, X, y):
    m = len(y)
    predictions = X.dot(theta)
    cost = (1/(2*m)) * np.sum(np.square(predictions - y))
    return cost


In [5]:
# Function to perform mini-batch gradient descent
def mini_batch_gradient_descent(X, y, theta, learning_rate, batch_size, epochs):
    m = len(y)
    cost_history = np.zeros(epochs)
    theta_history = np.zeros((epochs, 2))
    
    for epoch in range(epochs):
        # Shuffle the dataset at the beginning of each epoch
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        
        for i in range(0, m, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]
            
            predictions = X_batch.dot(theta)
            errors = predictions - y_batch
            gradient = (1/batch_size) * X_batch.T.dot(errors)
            theta = theta - learning_rate * gradient
        
        # Store cost and theta history
        cost_history[epoch] = compute_cost(theta, X, y)
        theta_history[epoch] = theta.T
    
    return theta, cost_history, theta_history

In [6]:
# Adding x0 = 1 to each instance
X_b = np.c_[np.ones((len(X), 1)), X]  # Add x0 = 1 to each instance
theta = np.random.randn(2, 1)  # Random initialization of theta

# Hyperparameters
learning_rate = 0.01
batch_size = 20
epochs = 50

In [7]:
# Performing mini-batch gradient descent
theta, cost_history, theta_history = mini_batch_gradient_descent(X_b, y, theta, learning_rate, batch_size, epochs)

# Plotting the cost function history
plt.figure(figsize=(8, 6))
plt.plot(range(len(cost_history)), cost_history, 'b.')
plt.xlabel("Number of Epochs")
plt.ylabel("Cost (J)")
plt.title("Cost Function History (MBGD)")
plt.show()


<IPython.core.display.Javascript object>

In [8]:
# Plotting the data and the fitted line
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', label='Data Points')
plt.plot(X, X_b.dot(theta), color='red', label='Fitted Line')
plt.xlabel("X")
plt.ylabel("y")
plt.title("Linear Regression Fit with Mini-Batch Gradient Descent")
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [9]:
# Animation of the MBGD steps
import matplotlib.animation as animation

fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(X, y, color='blue')
line, = ax.plot(X, X_b.dot(theta_history[0]), color='red')
plt.xlabel("X")
plt.ylabel("y")
plt.title("Mini-Batch Gradient Descent Animation")

def update(num, theta_history, line):
    theta = theta_history[num]
    line.set_ydata(X_b.dot(theta))
    return line,

ani = animation.FuncAnimation(fig, update, frames=range(0, len(theta_history)), fargs=[theta_history, line], blit=True)
plt.show()

<IPython.core.display.Javascript object>