In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml

def load_mnist():
    mnist = fetch_openml('mnist_784', version=1)
    X = mnist.data / 255.0
    y = mnist.target.astype(int) 
    return X.values, y

class Linear:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))

    def forward(self, x):
        self.input = x
        return np.dot(x, self.weights) + self.bias

    def backward(self, d_output):
        d_input = np.dot(d_output, self.weights.T)
        self.d_weights = np.dot(self.input.T, d_output)
        self.d_bias = np.sum(d_output, axis=0, keepdims=True)
        return d_input

class ReLU:
    def forward(self, x):
        self.input = x
        return np.maximum(0, x)

    def backward(self, d_output):
        return d_output * (self.input > 0)

class Softmax:
    def forward(self, x):
        exp_x = np.exp(x - np.max(x))  
        self.output = exp_x / np.sum(exp_x, axis=1, keepdims=True)
        return self.output

    def backward(self, y_true, loss):
        batch_size = y_true.shape[0]
        d_output = loss.backward(y_true, self.output)
        return d_output / batch_size

class MSE_Loss:
    def forward(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

    def backward(self, y_true, y_pred):
        return 2 * (y_pred - y_true) / y_true.size


class NeuralNetwork:
    def __init__(self, loss):
        self.linear1 = Linear(784, 128)
        self.relu = ReLU()
        self.linear2 = Linear(128, 10)
        self.softmax = Softmax()
        self.loss = loss

    def forward(self, x):
        out1 = self.linear1.forward(x)
        out2 = self.relu.forward(out1)
        out3 = self.linear2.forward(out2)
        return self.softmax.forward(out3)

    def backward(self, x, y):
        one_hot_y = np.zeros((y.size, 10))
        one_hot_y[np.arange(y.size), y] = 1
        
        d_output = self.softmax.backward(one_hot_y, self.loss)
        d_output2 = self.linear2.backward(d_output)
        d_output1 = self.relu.backward(d_output2)
        self.linear1.backward(d_output1)

    def update_parameters(self, learning_rate):
        self.linear1.weights -= learning_rate * self.linear1.d_weights
        self.linear1.bias -= learning_rate * self.linear1.d_bias
        self.linear2.weights -= learning_rate * self.linear2.d_weights
        self.linear2.bias -= learning_rate * self.linear2.d_bias

def evaluate_model(nn, X_test, y_test):
    predictions = nn.forward(X_test)
    predicted_classes = np.argmax(predictions, axis=1)
    accuracy = np.mean(predicted_classes == y_test) * 100
    print(f"Accuracy: {accuracy:.2f}%")
    

In [25]:
X_train, y_train = load_mnist()

X_test_subset = X_train[10000:11000]
y_test_subset = y_train[10000:11000]
y_test = np.eye(10)[y_test_subset]

epochs = 3
learning_rate = 0.01

nn = NeuralNetwork(loss = MSE_Loss())
    
for epoch in range(epochs):
    for i in range(X_train.shape[0]):
        x_i = X_train[i:i+1]
        y_i = y_train[i:i+1]
        nn.forward(x_i)
        nn.backward(x_i, y_i)
        nn.update_parameters(learning_rate)
    print(epoch)

evaluate_model(nn, X_test_subset, y_test_subset)

0
1
2
Accuracy: 96.20%
