In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights with random values
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def _sigmoid_derivative(self, z):
        return self._sigmoid(z) * (1 - self._sigmoid(z))

    def _relu(self, z):
        return np.maximum(0, z)

    def _relu_derivative(self, z):
        return (z > 0).astype(float)

    def _softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def _cross_entropy_loss(self, y, y_hat):
        m = y.shape[0]
        log_likelihood = -np.log(y_hat[range(m), y.argmax(axis=1)])
        loss = np.sum(log_likelihood) / m
        return loss

    def forward(self, X):
        # Hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self._relu(self.z1)

        # Output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self._softmax(self.z2)

        return self.a2

    def backward(self, X, y, learning_rate):
        m = X.shape[0]

        # Output layer error
        dz2 = self.a2 - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Hidden layer error
        dz1 = np.dot(dz2, self.W2.T) * self._relu_derivative(self.z1)
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

    def train(self, X, y, epochs, learning_rate, batch_size=32):
        losses = []
        for epoch in range(epochs):
            # Mini-batch gradient descent
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                # Forward and backward pass
                output = self.forward(X_batch)
                self.backward(X_batch, y_batch, learning_rate)

            # Calculate loss for monitoring
            output = self.forward(X)
            loss = self._cross_entropy_loss(y, output)
            losses.append(loss)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

        return losses

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

# Example usage
if __name__ == "__main__":
    # Generate synthetic data
    X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_informative=15, random_state=42)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Convert labels to one-hot encoding
    y_train_onehot = np.eye(3)[y_train]
    y_test_onehot = np.eye(3)[y_test]

    # Create and train neural network
    input_size = X_train.shape[1]
    hidden_size = 64
    output_size = 3

    nn = NeuralNetwork(input_size, hidden_size, output_size)
    losses = nn.train(X_train, y_train_onehot, epochs=1000, learning_rate=0.01, batch_size=32)

    # Make predictions
    train_preds = nn.predict(X_train)
    test_preds = nn.predict(X_test)

    # Calculate accuracy
    train_acc = accuracy_score(y_train, train_preds)
    test_acc = accuracy_score(y_test, test_preds)

    print(f"Training Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")

Epoch 0, Loss: 1.0981
Epoch 100, Loss: 0.6212
Epoch 200, Loss: 0.3938
Epoch 300, Loss: 0.2591
Epoch 400, Loss: 0.1835
Epoch 500, Loss: 0.1362
Epoch 600, Loss: 0.1048
Epoch 700, Loss: 0.0825
Epoch 800, Loss: 0.0655
Epoch 900, Loss: 0.0526
Training Accuracy: 0.9975
Test Accuracy: 0.7750
