In [1]:
import numpy as np

def sigmoid(z):
    """Compute the sigmoid function."""
    return 1 / (1 + np.exp(-z))

def compute_loss(X, y, w):
    """Compute the logistic loss function."""
    z = np.dot(X, w)
    y_pred = sigmoid(z)
    epsilon = 1e-5  # small number to avoid log(0)
    return -np.sum(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))

def compute_gradient(X, y, w):
    """Compute the gradient of the logistic loss function."""
    z = np.dot(X, w)
    y_pred = sigmoid(z)
    return np.dot(X.T, (y_pred - y))

def mini_batch_sgd(X, y, batch_size, learning_rate, max_iterations):
    n_samples, n_features = X.shape
    w = np.zeros(n_features)
    loss_history = []

    for iteration in range(max_iterations):
        # Randomly select a batch of samples
        indices = np.random.choice(n_samples, batch_size, replace=False)
        X_batch = X[indices]
        y_batch = y[indices]

        # Compute the gradient on the batch
        gradient = compute_gradient(X_batch, y_batch, w)
        
        # Update the weights
        w -= learning_rate * gradient
        
        # Compute and store the loss
        loss = compute_loss(X, y, w)
        loss_history.append(loss)
        
        print(f"Iteration {iteration + 1}, Loss: {loss}")

    return w, loss_history

# Example usage
if __name__ == "__main__":
    # Generate some synthetic data (for demonstration purposes)
    from sklearn.datasets import make_classification
    X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, random_state=42)

    # Hyperparameters
    batch_size = 50
    learning_rate = 0.01
    max_iterations = 100

    # Train the model
    w, loss_history = mini_batch_sgd(X, y, batch_size, learning_rate, max_iterations)
    print("Final weights:", w)

Iteration 1, Loss: 497.9211426654398
Iteration 2, Loss: 540.7898916166055
Iteration 3, Loss: 1028.5611816142382
Iteration 4, Loss: 1550.1783798857768
Iteration 5, Loss: 647.5547336556713
Iteration 6, Loss: 477.58827761322294
Iteration 7, Loss: 691.2441642275571
Iteration 8, Loss: 833.135364511259
Iteration 9, Loss: 588.5208469146696
Iteration 10, Loss: 603.5703227659482
Iteration 11, Loss: 503.8168270696686
Iteration 12, Loss: 543.1175734550575
Iteration 13, Loss: 1096.9677264178504
Iteration 14, Loss: 608.4125539486982
Iteration 15, Loss: 507.169598381028
Iteration 16, Loss: 552.9126682787572
Iteration 17, Loss: 544.2837890217443
Iteration 18, Loss: 1034.6082851403764
Iteration 19, Loss: 558.0777261909301
Iteration 20, Loss: 520.7115398236647
Iteration 21, Loss: 967.0539294847135
Iteration 22, Loss: 729.6271737688521
Iteration 23, Loss: 659.1554030363766
Iteration 24, Loss: 659.2135307915846
Iteration 25, Loss: 550.2468156379222
Iteration 26, Loss: 488.8200132502768
Iteration 27, Loss