In [3]:
import numpy as np

def stochastic_gradient_descent(X, y, learning_rate=0.01, iterations=1000):
    """
    Stochastic Gradient Descent for Linear Regression

    Parameters:
    -----------
    X : numpy.ndarray
        Training features, shape (n_samples, n_features).
    y : numpy.ndarray
        Target values, shape (n_samples,).
    learning_rate : float, optional
        Step size for gradient descent (default is 0.01).
    iterations : int, optional
        Number of epochs (full passes through the training data) (default is 1000).

    Returns:
    --------
    w : numpy.ndarray
        Estimated weight vector of shape (n_features,).
    b : float
        Estimated intercept for the linear regression model.
    cost_history : list
        List of Mean Squared Error (MSE) values after each epoch.
    """
    y = y.reshape(-1)
    n_samples = X.shape[0]
    n_features = X.shape[1]

    # Initialize weights and bias
    w = np.zeros(n_features)
    b = 0.0

    cost_history = []

    for _ in range(iterations):
        # Shuffle the data at each epoch
        indices = np.arange(n_samples)
        np.random.shuffle(indices)

        for i in indices:
            # Pick a single sample
            xi = X[i, :]
            yi = y[i]

            # Compute prediction
            y_pred = np.dot(xi, w) + b

            # Compute gradient for this sample
            error = y_pred - yi
            w_grad = 2 * xi * error
            b_grad = 2 * error

            # Parameter update
            w -= learning_rate * w_grad
            b -= learning_rate * b_grad

        # Compute MSE after the full pass
        y_pred_full = X @ w + b
        mse = np.mean((y_pred_full - y) ** 2)
        cost_history.append(mse)

    return w, b, cost_history

> ## Example usage

In [4]:
np.random.seed(42)
# Generate a small synthetic dataset
X_demo = 2 * np.random.rand(50, 1)
y_demo = 4 + 3 * X_demo[:, 0] + np.random.randn(50)
    
# Reshape X_demo to (n_samples, n_features)
X_demo = X_demo.reshape(-1, 1)
    
# Run Stochastic Gradient Descent
w_final, b_final, cost_hist = stochastic_gradient_descent(
            X_demo,
            y_demo,
            learning_rate=0.01,
            iterations=50
        )
    
print("Final Weight (w):", w_final)
print("Final Bias (b):", b_final)
print("Final MSE:", cost_hist[-1])

Final Weight (w): [2.91051419]
Final Bias (b): 4.121492162666368
Final MSE: 0.8252246356479739
