In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [2]:
import numpy as np

class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01, lambda_reg=0.01, epochs=1000, tol=1e-3, patience=10, batch_size=32):
        """
        Initialize the 2-layer neural network model.

        Parameters:
            input_size (int): Number of features in the input.
            hidden_size (int): Number of neurons in the hidden layer.
            output_size (int): Number of output neurons (classes).
            learning_rate (float): Step size for gradient updates.
            lambda_reg (float): L2 regularization strength.
            epochs (int): Number of training iterations.
            tol (float): Tolerance for early stopping.
            patience (int): Number of epochs to wait for improvement before stopping.
            batch_size (int): Size of each mini-batch for gradient descent.
        """
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.lambda_reg = lambda_reg
        self.epochs = epochs
        self.tol = tol
        self.patience = patience
        self.batch_size = batch_size
        
        # Initialize weights and biases
        self.W_1 = np.random.randn(hidden_size, input_size) * np.sqrt(2. / input_size)  # He initialization
        self.W_2 = np.random.randn(output_size, hidden_size) * np.sqrt(2. / hidden_size)

        self.b_1 = np.zeros(hidden_size)
        self.b_2 = np.zeros(output_size)
        
        self.loss_history = []

    def relu(self, x):
        """
        ReLU activation function.
        """
        return np.maximum(0, x)

    def compute_loss(self, X, y):
        """
        Compute the loss function with L2 regularization.

        Parameters:
            X (np.array): Feature matrix.
            y (np.array): Target labels (one-hot encoded).
        
        Returns:
            float: The loss value.
        """
        m = X.shape[0]
        
        # Forward pass
        hidden_input = np.dot(X, self.W_1.T) + self.b_1
        hidden_output = self.relu(hidden_input)
        output_input = np.dot(hidden_output, self.W_2.T) + self.b_2
        output = output_input  # Linear output (for regression)

        # Compute loss
        loss = np.mean((output - y) ** 2)  # Mean Squared Error loss
        loss += (self.lambda_reg / 2) * (np.sum(self.W_1 ** 2) + np.sum(self.W_2 ** 2))  # Regularization term
        return loss

    def fit(self, X, y, verbose=False):
        """
        Train the 2-layer neural network using mini-batch gradient descent.

        Parameters:
            X (np.array): Feature matrix (n_samples, n_features).
            y (np.array): Target labels (n_samples, n_classes).
            verbose (bool): If True, print loss progress.
        """
        m = X.shape[0]
        best_loss = float('inf')
        no_improve_count = 0

        for epoch in range(self.epochs):
            # Shuffle the data at the beginning of each epoch
            indices = np.random.permutation(m)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            # Mini-batch gradient descent
            for i in range(0, m, self.batch_size):
                X_batch = X_shuffled[i:i + self.batch_size]
                y_batch = y_shuffled[i:i + self.batch_size]
                
                # Forward pass
                hidden_input = np.dot(X_batch, self.W_1.T) + self.b_1
                hidden_output = self.relu(hidden_input)
                output_input = np.dot(hidden_output, self.W_2.T) + self.b_2
                output = output_input  # Linear output (for regression)

                # Compute loss
                loss = self.compute_loss(X_batch, y_batch)
                self.loss_history.append(loss)

                # Backpropagation
                # Output layer gradients
                d_output = 2 * (output - y_batch) / X_batch.shape[0]
                d_W_2 = np.dot(d_output.T, hidden_output) + self.lambda_reg * self.W_2
                d_b_2 = np.sum(d_output, axis=0)

                # Hidden layer gradients
                d_hidden = np.dot(d_output, self.W_2) * (hidden_output > 0)  # Derivative of ReLU
                d_W_1 = np.dot(d_hidden.T, X_batch) + self.lambda_reg * self.W_1
                d_b_1 = np.sum(d_hidden, axis=0)

                # Update weights and biases
                self.W_1 -= self.learning_rate * d_W_1
                self.b_1 -= self.learning_rate * d_b_1
                self.W_2 -= self.learning_rate * d_W_2
                self.b_2 -= self.learning_rate * d_b_2

            if verbose and epoch % 5 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.6f}")

            # Early stopping
            if loss < best_loss - self.tol:
                best_loss = loss
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= self.patience:
                    print(f"Early stopping at epoch {epoch}. Best loss: {best_loss:.6f}")
                    break

    def predict(self, X):
        """
        Predict the class labels for the given input data.

        Parameters:
            X (np.array): Feature matrix.

        Returns:
            np.array: Predicted class labels (0 or 1).
        """
        hidden_input = np.dot(X, self.W_1.T) + self.b_1
        hidden_output = self.relu(hidden_input)
        output_input = np.dot(hidden_output, self.W_2.T) + self.b_2
        
        # For classification, return the class with the highest score (argmax)
        return np.argmax(output_input, axis=1)  # Use argmax to get the class label


In [3]:
cancer = load_breast_cancer()
X = cancer.data[:, :2]
y = cancer.target
y = np.eye(2)[y.astype(int)]
    
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)
    
# Split data
X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=282025
)
    
# Train model
model = MLP(input_size=2, 
            output_size=2, 
            hidden_size=10, 
            learning_rate=0.001, 
            epochs=1000,
            batch_size=32)
model.fit(X_train, y_train)
    
# Evaluate
y_pred = model.predict(X_test)
y_test_labels = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_test_labels)
print(f"\nTest accuracy: {accuracy:.4f}")

Early stopping at epoch 47. Best loss: 0.201345

Test accuracy: 0.8596
