In [1]:
import numpy as np


In [2]:
# Activation function and its derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [4]:
# Loss function and its derivative
def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_derivative(y_true, y_pred):
    return -(y_true / y_pred) + (1 - y_true) / (1 - y_pred)

In [5]:
# Neural Network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.weights1 = np.random.randn(input_size, hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size)
        self.bias2 = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = sigmoid(self.z2)
        return self.a2

    def backward(self, X, y, y_pred, learning_rate):
        # Backward pass - dl/ds*ds/dz*dz/dw
        loss_gradient = binary_cross_entropy_derivative(y, y_pred)

        # Gradients for weights2 and bias2
        dz2 = loss_gradient * sigmoid_derivative(self.z2)
        dw2 = np.dot(self.a1.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)

        # Gradients for weights1 and bias1
        dz1 = np.dot(dz2, self.weights2.T) * sigmoid_derivative(self.z1)
        dw1 = np.dot(X.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)

        # Update weights and biases
        self.weights1 -= learning_rate * dw1
        self.bias1 -= learning_rate * db1
        self.weights2 -= learning_rate * dw2
        self.bias2 -= learning_rate * db2


    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)

            # Compute loss
            loss = binary_cross_entropy(y, y_pred)

            # Backward pass
            self.backward(X, y, y_pred, learning_rate)

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")


In [6]:
# Example usage
if __name__ == "__main__":
    # Dummy dataset (XOR problem)
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])

    # Initialize and train the neural network
    nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
    nn.train(X, y, epochs=1000, learning_rate=0.1)

    # Test the model
    print("Predictions:")
    print(nn.forward(X))

Epoch 0, Loss: 0.8303997013474003
Epoch 100, Loss: 0.6154736467900566
Epoch 200, Loss: 0.45048805384888074
Epoch 300, Loss: 0.2661484826807904
Epoch 400, Loss: 0.16078889613718494
Epoch 500, Loss: 0.10774458085787805
Epoch 600, Loss: 0.07867478797790405
Epoch 700, Loss: 0.06105111463048071
Epoch 800, Loss: 0.04946273402102626
Epoch 900, Loss: 0.041355987463375446
Predictions:
[[0.04194818]
 [0.96756114]
 [0.96613921]
 [0.03087176]]
