In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [6]:
def sigmoid(z):
    """
    Compute the sigmoid activation.
        
    Parameters:
        z (np.array): Linear combination input.
            
    Returns:
        np.array: Sigmoid activation of z.
    """
    return 1 / (1 + np.exp(-z))

class LogisticRegressionImplement:
    def __init__(self, learning_rate=0.01, epochs=100, batch_size=32, tol=1e-3, patience=10):
        """
        Initialize the Logistic Regression model using Mini-batch SGD.

        Parameters:
            learning_rate (float): Learning rate for gradient descent.
            epochs (int): Number of training iterations.
            batch_size (int): Number of samples per mini-batch.
            tol (float): Tolerance for early stopping (minimum loss improvement).
            patience (int): Number of epochs to wait before early stopping.
        """
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.tol = tol
        self.patience = patience
        self.weights = None
        self.loss_history = []

    def compute_loss(self, y, y_pred):
        """
        Compute binary cross-entropy loss.

        Parameters:
            y (np.array): True binary labels.
            y_pred (np.array): Predicted probabilities.

        Returns:
            float: The BCE loss.
        """
        # Add epsilon to avoid log(0)
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

    def fit(self, X, y, verbose=False):
        """
        Train the logistic regression model using Mini-batch Stochastic Gradient Descent.

        Parameters:
            X (np.array): Feature matrix.
            y (np.array): Target vector.
            verbose (bool): If True, print loss progress.

        Returns:
            self: The fitted model.
        """
        # Add bias term (a column of ones)
        X_b = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        n_samples, n_features = X_b.shape

        # Initialize weights randomly
        self.weights = np.random.randn(n_features)

        # Early stopping variables
        best_loss = float("inf")
        no_improve_count = 0

        # Training loop over epochs
        for epoch in range(self.epochs):
            # Shuffle data at the start of each epoch
            indices = np.random.permutation(n_samples)
            X_shuffled = X_b[indices]
            y_shuffled = y[indices]

            # Process mini-batches
            for i in range(0, n_samples, self.batch_size):
                X_batch = X_shuffled[i:i + self.batch_size]
                y_batch = y_shuffled[i:i + self.batch_size]

                # Compute linear predictions and then probabilities using sigmoid
                z = np.dot(X_batch, self.weights)
                y_pred = sigmoid(z)

                # Compute the error (predicted probabilities minus true labels)
                error = y_pred - y_batch

                # Compute gradient
                grad = np.dot(X_batch.T, error) / len(y_batch)

                # Update weights with SGD
                self.weights -= self.lr * grad

            # Compute loss over the full dataset for monitoring
            z_all = np.dot(X_b, self.weights)
            y_pred_all = sigmoid(z_all)
            loss = self.compute_loss(y, y_pred_all)
            self.loss_history.append(loss)

            if verbose and epoch % 5 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.6f}")

            # Early stopping check
            if loss < best_loss - self.tol:
                best_loss = loss
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= self.patience:
                    print(f"Early stopping at epoch {epoch}. Best loss: {best_loss:.6f}")
                    break

        return self

    def predict_proba(self, X):
        """
        Predict probabilities for the input features.

        Parameters:
            X (np.array): Feature matrix.

        Returns:
            np.array: Predicted probabilities.
        """
        X_b = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
        z = np.dot(X_b, self.weights)
        return sigmoid(z)

    def predict(self, X):
        """
        Predict binary labels for the input features.

        Parameters:
            X (np.array): Feature matrix.

        Returns:
            np.array: Binary predictions (0 or 1).
        """
        proba = self.predict_proba(X)
        return (proba >= 0.5).astype(int)

In [7]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
model = LogisticRegressionImplement(learning_rate=0.1, epochs=100, batch_size=32, tol=1e-4, patience=10)
model.fit(X_train, y_train, verbose=True)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate and print the test accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Epoch 0: Loss = 0.557525
Epoch 5: Loss = 0.378728
Epoch 10: Loss = 0.352244
Epoch 15: Loss = 0.350487
Epoch 20: Loss = 0.350374
Epoch 25: Loss = 0.350365
Early stopping at epoch 27. Best loss: 0.350406
Test Accuracy: 0.87
