In [3]:
import numpy as np

def mini_batch_gradient_descent(X, y, learning_rate=0.01, epochs=100, batch_size=20):
    """
    Mini-Batch Gradient Descent for Linear Regression.

    Parameters:
    -----------
    X : numpy.ndarray
        Training features, shape (n_samples, n_features).
    y : numpy.ndarray
        Target values, shape (n_samples,).
    learning_rate : float
        Step size for gradient updates (default is 0.01).
    epochs : int
        Number of epochs (full passes through the dataset) (default is 100).
    batch_size : int
        Number of samples per mini-batch (default is 20).

    Returns:
    --------
    w : numpy.ndarray
        Estimated weight vector of shape (n_features,).
    b : float
        Estimated intercept for the linear regression model.
    cost_history : list
        List of Mean Squared Error (MSE) after each epoch.
    """
    n_samples, n_features = X.shape
    # Make sure y is 1D
    y = y.reshape(-1)

    # Initial parameters
    w = np.zeros(n_features)
    b = 0.0

    cost_history = []

    for _ in range(epochs):
        # Shuffle training data every epoch
        indices = np.arange(n_samples)
        np.random.shuffle(indices)

        # Generate mini-batches
        for start_idx in range(0, n_samples, batch_size):
            end_idx = start_idx + batch_size
            batch_indices = indices[start_idx:end_idx]

            # Create mini-batch
            X_batch = X[batch_indices]
            y_batch = y[batch_indices]

            # Predictions
            y_pred_batch = X_batch @ w + b

            # Error
            errors = y_pred_batch - y_batch

            # Gradients
            dw = (2 / batch_size) * (X_batch.T @ errors)
            db = (2 / batch_size) * np.sum(errors)

            # Update parameters
            w -= learning_rate * dw
            b -= learning_rate * db

        # Calculate MSE for the entire dataset after each epoch
        y_pred_full = X @ w + b
        mse = np.mean((y_pred_full - y) ** 2)
        cost_history.append(mse)

    return w, b, cost_history

> ## Example usage:

In [4]:
# Generate synthetic data: y ~ 3 * X + 5 with some noise
np.random.seed(42)
X_demo = 10 * np.random.rand(100, 1)  # X in [0, 10)
noise = np.random.normal(0, 2, size=(100,))
y_demo = 5 + 3 * X_demo[:, 0] + noise

# Reshape X to (n_samples, n_features)
X_demo = X_demo.reshape(-1, 1)

# Train with mini-batch gradient descent
w_final, b_final, cost_hist = mini_batch_gradient_descent(
        X_demo, y_demo,
        learning_rate=0.01,
        epochs=100,
        batch_size=10
    )

print("Final Weight (w):", w_final)
print("Final Bias (b):", b_final)
print("Final MSE:", cost_hist[-1])

Final Weight (w): [2.85553206]
Final Bias (b): 5.401175062967701
Final MSE: 3.3266337627769236
