In [4]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def softmax(self, x):
        exp_scores = np.exp(x)
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    
    def forward(self, X):
        # Input to hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        # Hidden layer to output
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.probs = self.softmax(self.z2)
        return self.probs
    
    def backward(self, X, y_true):
        m = X.shape[0]
        # Compute error at output layer
        delta2 = self.probs - y_true
        # Compute gradients for weights and biases of output layer
        dW2 = np.dot(self.a1.T, delta2) / m
        db2 = np.sum(delta2, axis=0, keepdims=True) / m
        # Compute error at hidden layer
        delta1 = np.dot(delta2, self.W2.T) * (self.a1 * (1 - self.a1))
        # Compute gradients for weights and biases of hidden layer
        dW1 = np.dot(X.T, delta1) / m
        db1 = np.sum(delta1, axis=0) / m
        # Update parameters using gradient descent
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

# Define cross-entropy loss function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / m

# Example usage:
input_size = 2
hidden_size = 3
output_size = 2
learning_rate = 0.01

# Initialize neural network
model = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)

# Generate synthetic dataset
observations = 1000
X = np.random.uniform(low=-10, high=10, size=(observations, input_size))
y = np.random.randint(0, 2, (observations, output_size))

# One-hot encode labels (No modification needed)
def one_hot_encode(y):
    return y

# Train the neural network
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward propagation
    probs = model.forward(X)
    # Backward propagation
    model.backward(X, one_hot_encode(y))
    # Compute and print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        loss = cross_entropy_loss(one_hot_encode(y), probs)
        print(f"Epoch {epoch + 1}, Loss: {loss:.4f}")


Epoch 100, Loss: 0.7321
Epoch 200, Loss: 0.6945
Epoch 300, Loss: 0.6892
Epoch 400, Loss: 0.6882
Epoch 500, Loss: 0.6880
Epoch 600, Loss: 0.6879
Epoch 700, Loss: 0.6879
Epoch 800, Loss: 0.6879
Epoch 900, Loss: 0.6878
Epoch 1000, Loss: 0.6878
