In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Generate synthetic dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=3, n_informative=5, random_state=42)
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Activation functions
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

# Neural Network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01, 
        reg_lambda=0.001, batch_size=32):
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda
        self.batch_size = batch_size
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def compute_loss(self, Y, Y_hat):
        m = Y.shape[0]
        loss = -np.sum(Y * np.log(Y_hat + 1e-8)) / m
        reg_loss = (self.reg_lambda / (2 * m)) * (np.sum(self.W1**2) + np.sum(self.W2**2))
        return loss + reg_loss

    def backward(self, X, Y):
        m = X.shape[0]
        dZ2 = self.A2 - Y
        dW2 = np.dot(self.A1.T, dZ2) / m + (self.reg_lambda / m) * self.W2
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        dZ1 = np.dot(dZ2, self.W2.T) * relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / m + (self.reg_lambda / m) * self.W1
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
        
        # Update weights
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

    def train(self, X, Y, epochs=1000):
        m = X.shape[0]
        for epoch in range(epochs):
            indices = np.random.permutation(m)
            X_shuffled, Y_shuffled = X[indices], Y[indices]
            
            for i in range(0, m, self.batch_size):
                X_batch = X_shuffled[i:i + self.batch_size]
                Y_batch = Y_shuffled[i:i + self.batch_size]
                
                Y_hat = self.forward(X_batch)
                self.backward(X_batch, Y_batch)
            
            if epoch % 100 == 0:
                Y_hat_full = self.forward(X)
                loss = self.compute_loss(Y, Y_hat_full)
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        Y_hat = self.forward(X)
        return np.argmax(Y_hat, axis=1)

# Initialize and train the neural network
nn = NeuralNetwork(input_size=10, hidden_size=100, output_size=3, learning_rate=0.01, reg_lambda=0.001, batch_size=32)
nn.train(X_train, y_train, epochs=1000)

# Evaluate on test data
y_pred = nn.predict(X_test)
y_true = np.argmax(y_test, axis=1)
accuracy = np.mean(y_pred == y_true)
print(f"Test Accuracy: {accuracy:.4f}")

Epoch 0, Loss: 1.0967
Epoch 100, Loss: 0.4155
Epoch 200, Loss: 0.3250
Epoch 300, Loss: 0.2880
Epoch 400, Loss: 0.2614
Epoch 500, Loss: 0.2411
Epoch 600, Loss: 0.2216
Epoch 700, Loss: 0.2041
Epoch 800, Loss: 0.1892
Epoch 900, Loss: 0.1772
Test Accuracy: 0.8800
