In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt


In [3]:
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize input data (scale pixel values to range 0-1)
X_train = X_train.reshape(X_train.shape[0], 784) / 255.0  # Flatten images
X_test = X_test.reshape(X_test.shape[0], 784) / 255.0

# Convert labels to one-hot encoding
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [39]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)


In [40]:
class MLP:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        np.random.seed(42)  # For reproducibility

        # Initialize weights & biases
        self.weights_input_hidden1 = np.random.randn(input_size, hidden_size1) * 0.01
        self.weights_hidden1_hidden2 = np.random.randn(hidden_size1, hidden_size2) * 0.01
        self.weights_hidden2_output = np.random.randn(hidden_size2, output_size) * 0.01
        self.bias_hidden1 = np.zeros((1, hidden_size1))
        self.bias_hidden2 = np.zeros((1, hidden_size2))
        self.bias_output = np.zeros((1, output_size))

    def forward(self, X):
        """ Forward pass through the network """
        self.input = X
        self.hidden1 = relu(np.dot(self.input, self.weights_input_hidden1) + self.bias_hidden1)
        self.hidden2 = relu(np.dot(self.hidden1, self.weights_hidden1_hidden2) + self.bias_hidden2)
        self.output = sigmoid(np.dot(self.hidden2, self.weights_hidden2_output) + self.bias_output)
        return self.output

    def backward(self, X, y, learning_rate):
        """ Backpropagation to update weights """
        # Compute error
        output_error = y - self.output
        output_delta = output_error * sigmoid_derivative(self.output)

        # Hidden layer 2 error
        hidden2_error = output_delta.dot(self.weights_hidden2_output.T)
        hidden2_delta = hidden2_error * relu_derivative(self.hidden2)

        # Hidden layer 1 error
        hidden1_error = hidden2_delta.dot(self.weights_hidden1_hidden2.T)
        hidden1_delta = hidden1_error * relu_derivative(self.hidden1)

        # Update weights & biases
        self.weights_hidden2_output += self.hidden2.T.dot(output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
        self.weights_hidden1_hidden2 += self.hidden1.T.dot(hidden2_delta) * learning_rate
        self.bias_hidden2 += np.sum(hidden2_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden1 += self.input.T.dot(hidden1_delta) * learning_rate
        self.bias_hidden1 += np.sum(hidden1_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        """ Train the model """
        loss_history = []
        for epoch in range(epochs):
            self.forward(X)
            self.backward(X, y, learning_rate)
            loss = np.mean(np.square(y - self.output))  # MSE loss
            loss_history.append(loss)
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')
        return loss_history


In [41]:
# Define network structure
input_size = 784       # 28x28 pixels
hidden_size1 = 128     # First hidden layer
hidden_size2 = 64      # Second hidden layer
output_size = 10       # Digits 0-9 (10 classes)

# Initialize model
mlp = MLP(input_size, hidden_size1, hidden_size2, output_size)

# Train the model
loss_history = mlp.train(X_train, y_train, epochs=1000, learning_rate=0.01)


Epoch 0, Loss: 0.25004968086222673
Epoch 100, Loss: 0.1
Epoch 200, Loss: 0.1
Epoch 300, Loss: 0.1
Epoch 400, Loss: 0.1
Epoch 500, Loss: 0.1
Epoch 600, Loss: 0.1
Epoch 700, Loss: 0.1
Epoch 800, Loss: 0.1
Epoch 900, Loss: 0.1
