In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:

x_train = x_train.reshape(x_train.shape[0], -1) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0

In [4]:

y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

In [5]:

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

In [6]:

class MLP:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

        self.weights = []
        self.biases = []


        layer_sizes = [input_size] + hidden_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]))
            self.biases.append(np.zeros(layer_sizes[i+1]))

    def forward(self, x):
        activations = [x]
        for i in range(len(self.weights)):
            x = np.dot(x, self.weights[i]) + self.biases[i]
            x = self.sigmoid(x)
            activations.append(x)
        return activations

    def backward(self, x, y, activations, learning_rate):
        deltas = [activations[-1] - y]


        for i in range(len(self.weights) - 1, 0, -1):
            delta = np.dot(deltas[0], self.weights[i].T) * self.sigmoid_derivative(activations[i])
            deltas.insert(0, delta)


        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * np.dot(activations[i].T, deltas[i])
            self.biases[i] -= learning_rate * np.sum(deltas[i], axis=0)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def train(self, x, y, epochs, learning_rate, batch_size):
        for epoch in range(epochs):
            for i in range(0, len(x), batch_size):
                x_batch = x[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                activations = self.forward(x_batch)
                self.backward(x_batch, y_batch, activations, learning_rate)

    def predict(self, x):
        activations = self.forward(x)
        return np.argmax(activations[-1], axis=1)

In [7]:
input_size = 784  
output_size = 10  
hidden_sizes = [128, 64] 
epochs = 10
learning_rate = 0.01
batch_size = 64

In [8]:
mlp = MLP(input_size, hidden_sizes, output_size)
mlp.train(x_train, y_train, epochs, learning_rate, batch_size)

In [9]:

train_predictions = mlp.predict(x_train)
val_predictions = mlp.predict(x_val)
test_predictions = mlp.predict(x_test)

In [10]:

train_accuracy = accuracy_score(np.argmax(y_train, axis=1), train_predictions)
val_accuracy = accuracy_score(np.argmax(y_val, axis=1), val_predictions)
test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)

In [11]:
print("Training Accuracy:", train_accuracy)
print("Validation Accuracy:", val_accuracy)
print("Test Accuracy:", test_accuracy)

Training Accuracy: 0.9691481481481482
Validation Accuracy: 0.9475
Test Accuracy: 0.9468
