In [6]:
import numpy as np
import torch
import torchvision

In [4]:

train_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

val_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.09MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 132kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.26MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.96MB/s]


In [7]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)




In [8]:
def one_hot(labels, num_classes=10):
    m = labels.shape[0]
    encoded = np.zeros((num_classes, m))
    encoded[labels, np.arange(m)] = 1
    return encoded


In [26]:
class NeuralNetwork:
    def __init__(self, lr=0.01):
        self.lr = lr

        self.W1 = np.random.randn(10, 784) * 0.01
        self.b1 = np.zeros((10, 1))

        self.W2 = np.random.randn(10, 10) * 0.01
        self.b2 = np.zeros((10, 1))

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return (Z > 0).astype(float)

    def softmax(self, Z):
        exp = np.exp(Z - np.max(Z, axis=0, keepdims=True))
        return exp / np.sum(exp, axis=0, keepdims=True)

    def forward(self, X):
        self.Z1 = self.W1 @ X + self.b1
        self.A1 = self.relu(self.Z1)

        self.Z2 = self.W2 @ self.A1 + self.b2
        self.A2 = self.softmax(self.Z2)

        return self.A2

    def compute_loss(self, Y, A2):
        m = Y.shape[1]
        return -np.sum(Y * np.log(A2 + 1e-8)) / m

    def backward(self, X, Y):
        m = X.shape[1]

        dZ2 = self.A2 - Y
        dW2 = (1 / m) * dZ2 @ self.A1.T
        db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

        dZ1 = (self.W2.T @ dZ2) * self.relu_derivative(self.Z1)
        dW1 = (1 / m) * dZ1 @ X.T
        db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

        self.dW1, self.db1 = dW1, db1
        self.dW2, self.db2 = dW2, db2

    def update_parameters(self):
        self.W1 -= self.lr * self.dW1
        self.b1 -= self.lr * self.db1
        self.W2 -= self.lr * self.dW2
        self.b2 -= self.lr * self.db2

    def predict(self, X):
        A2 = self.forward(X)
        return np.argmax(A2, axis=0)

    def evaluate(self, X, labels):
        preds = self.predict(X)
        return np.mean(preds == labels)



In [27]:
nn = NeuralNetwork(lr=0.05)
epochs = 5


In [28]:
for epoch in range(epochs):
    losses = []
    accs = []

    for images, labels in train_loader:
        images = images.cpu().numpy()
        labels = labels.cpu().numpy()

        X = images.reshape(images.shape[0], -1).T
        Y = one_hot(labels)

        A2 = nn.forward(X)

        loss = nn.compute_loss(Y, A2)
        losses.append(loss)

        nn.backward(X, Y)
        nn.update_parameters()

        acc = nn.evaluate(X, labels)
        accs.append(acc)

    print(f"Epoch {epoch+1}")
    print(f"Train Loss: {np.mean(losses):.4f}")
    print(f"Train Acc : {np.mean(accs)*100:.2f}%\n")


Epoch 1
Train Loss: 0.9815
Train Acc : 71.94%

Epoch 2
Train Loss: 0.3621
Train Acc : 91.01%

Epoch 3
Train Loss: 0.3178
Train Acc : 92.13%

Epoch 4
Train Loss: 0.2951
Train Acc : 92.75%

Epoch 5
Train Loss: 0.2813
Train Acc : 93.15%



In [29]:
val_acc = []
val_loss = []

for images, labels in val_loader:
    images = images.cpu().numpy()
    labels = labels.cpu().numpy()

    X = images.reshape(images.shape[0], -1).T
    Y = one_hot(labels)

    A2 = nn.forward(X)
    val_loss.append(nn.compute_loss(Y, A2))
    val_acc.append(nn.evaluate(X, labels))

print(f"Validation Loss: {np.mean(val_loss):.4f}")
print(f"Validation Acc : {np.mean(val_acc)*100:.2f}%")


Validation Loss: 0.2642
Validation Acc : 92.49%
