In [26]:
import numpy as np
import torchvision.datasets as datasets
from sklearn.preprocessing import OneHotEncoder

In [32]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

In [33]:
train_data = train_dataset.data
train_labels = train_dataset.targets
test_data = test_dataset.data
test_labels = test_dataset.targets

In [34]:
encoder = OneHotEncoder(sparse=False)
train_data = train_data.reshape(len(train_data), -1)/255.0
test_data = test_data.reshape(len(test_data), -1)/255.0
train_labels = train_labels.numpy().reshape(-1, 1)
train_labels = encoder.fit_transform(train_labels)
test_labels = test_labels.numpy().reshape(-1, 1)
test_labels = encoder.fit_transform(test_labels)



In [35]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def relu(self, x):
        return np.maximum(0, x)

    def softmax(self, x):
        exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def backward(self, X, y, learning_rate):
        m = X.shape[0]

        dz2 = self.a2 - y # d(Loss)/d(a2)
        dW2 = (1 / m) * np.dot(self.a1.T, dz2) # d(Loss)/d(W2)
        db2 = (1 / m) * np.sum(dz2, axis=0, keepdims=True) # d(Loss)/d(b2)

        dz1 = np.dot(dz2, self.W2.T) * (self.z1 > 0) # d(Loss)/d(z1)
        dW1 = (1 / m) * np.dot(X.T, dz1) # d(Loss)/d(w1)
        db1 = (1 / m) * np.sum(dz1, axis=0, keepdims=True) # d(Loss)/d(b1)

        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            predictions = self.forward(X)
            loss = np.mean((predictions - y) ** 2) # MSE
            self.backward(X, y, learning_rate)

    def predict(self, X):
        hidden_output = self.relu(np.dot(X, self.W1) + self.b1)
        output = self.softmax(np.dot(hidden_output, self.W2) + self.b2)
        return np.argmax(output, axis=1)


In [36]:
input_size = train_data.shape[1]
hidden_size = 512
output_size = 10

nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(train_data, train_labels, epochs=100, learning_rate=0.01)

predictions = nn.forward(test_data)
accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(test_labels, axis=1))
print(f"Accuracy: {accuracy * 100}%")

Accuracy: 63.85999999999999%
