In [2]:
import numpy as np

np.random.seed(42)  # for reproducibility
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [15]:
def mini_batch_gradient_descent(X, y, learning_rate=0.01, n_epochs=100, batch_size=20):
    m = len(X)
    n_batches = m // batch_size
    n_features = X.shape[1]
    theta = np.random.randn(n_features, 1)  # initialize parameters

    for epoch in range(n_epochs):
        shuffled_indices = np.random.permutation(m)
        X_shuffled = X[shuffled_indices]
        y_shuffled = y[shuffled_indices]

        for i in range(n_batches):
            start_idx = i * batch_size
            end_idx = start_idx + batch_size
            X_batch = X_shuffled[start_idx:end_idx]
            y_batch = y_shuffled[start_idx:end_idx]

            gradients = 2/batch_size * X_batch.T.dot(X_batch.dot(theta) - y_batch)
            theta -= learning_rate * gradients

    return theta


# Mini-batch size = 20
theta_20 = mini_batch_gradient_descent(X, y, batch_size=20)
print("Learned parameters when mini-batch size is 20: ", theta_20.ravel())

# Mini-batch size = 10
theta_10 = mini_batch_gradient_descent(X, y, batch_size=10)
print("Learned parameters when mini-batch size is 10: ", theta_10.ravel())

# Mini-batch size = 50
theta_50 = mini_batch_gradient_descent(X, y, batch_size=50)
print("Learned parameters when mini-batch size is 50: ", theta_50.ravel())

Learned parameters when mini-batch size is 20:  [5.98152624]
Learned parameters when mini-batch size is 10:  [5.97949965]
Learned parameters when mini-batch size is 50:  [5.93563929]
