In [2]:
import numpy as np
from torchvision.datasets import MNIST


def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)

    mnist_data = []
    mnist_labels = []
    for image, labels in dataset:
        mnist_data.append(image)
        mnist_labels.append(labels)
    return np.array(mnist_data), np.array(mnist_labels)


X_train, y_train = download_mnist(True)
X_test, y_test = download_mnist(False)

In [3]:
def normalizeData(data: np.array) -> np.array:
    return (data - np.min(data)) / (np.max(data) - np.min(data))


X_train_norm = normalizeData(X_train)
X_test_norm = normalizeData(X_test)

In [4]:
def createEntryArray(value: int) -> np.array:
    entry = [0] * 10
    entry[value] = 1
    return entry


def oneHotEncoding(data: np.array) -> np.array:
    # Create new numpy array.
    oneHotData = []
    # Iterate through each element, and build one hot version.
    for entry in data:
        oneHotData.append(createEntryArray(entry))
    return np.array(oneHotData)


y_train_oneHot = oneHotEncoding(y_train)
y_test_oneHot = oneHotEncoding(y_test)

In [5]:
def splitBatches(df: np.array, batch_size: int) -> np.array:
    splitStart = 0
    splitEnd = batch_size
    batches = df.shape[0] // batch_size
    batchdf = []
    for i in range(batches):
        batchdf.append(list(df[splitStart : splitEnd]))
        splitStart = splitEnd
        splitEnd += batch_size
    if (splitEnd < df.shape[0]):
        batchdf.append(list(df[splitEnd : df.shape[0]]))
    return np.array(batchdf, dtype=np.float128)


batch_size = 100

X_train_split = splitBatches(X_train_norm, batch_size)
y_train_split = splitBatches(y_train_oneHot, batch_size)
X_test_split = splitBatches(X_test_norm, batch_size)
y_test_split = splitBatches(y_test_oneHot, batch_size)

In [6]:
def forwardPropagation(X_train: np.array,
                       weights: np.array,
                       biases: np.array) -> np.array:
    z = weights @ X_train.T
    linEq = z.T + biases
    return softmax(linEq)


def softmax(scores: np.array) -> np.array:
    s = np.max(scores, axis=1)
    s = s[:, np.newaxis]
    e_x = np.exp(scores - s)
    div = np.sum(e_x, axis=1)
    div = div[:, np.newaxis]
    return e_x / div


def gradientDescentUpdate(X_train: np.array,
                          weights: np.array,
                          biases: np.array,
                          l_rate: float,
                          error: np.array) -> tuple[np.array, np.array]:
    biases = biases + l_rate * error.sum(axis=0)
    weights = weights + l_rate * error.T @ X_train
    return (weights, biases)

In [7]:
def trainNeuralNetwork(X_train: np.array,
                       y_train: np.array,
                       batch_size: int,
                       l_rate: float,
                       epochs: int) -> tuple[np.array, np.array]:
    # Initialize weights and biases.
    weights = np.zeros((10, 784), dtype=np.float128)
    biases  = np.zeros((10), dtype=np.float128)
    # Iterate through epochs.
    print("Start training")
    for epoch in range(epochs):
        print(f"Epoch={epoch}")
        # For each batch, perform forward propagation, gradient descent and update.
        for batch in range(batch_size):
            # Prepare data for forward propagation.
            xBatch = X_train[batch]
            yBatch = y_train[batch]

            # Make prediction using forward propagation.
            prediction = forwardPropagation(xBatch, weights, biases)

            # Compute error.
            error = yBatch - prediction

            # Update weights and biases using gradient descent.
            (weightsBatch, biasesBatch) = gradientDescentUpdate(xBatch, weights, biases, l_rate, error)
            weights = weightsBatch
            biases = biasesBatch
    print("Training ended.")
    return (weights, biases)

In [23]:
l_rate = 0.001
epochs = 60
(weights, biases) = trainNeuralNetwork(X_train_split, y_train_split, batch_size, l_rate, epochs)

Start training
Epoch=0
Epoch=1
Epoch=2
Epoch=3
Epoch=4
Epoch=5
Epoch=6
Epoch=7
Epoch=8
Epoch=9
Epoch=10
Epoch=11
Epoch=12
Epoch=13
Epoch=14
Epoch=15
Epoch=16
Epoch=17
Epoch=18
Epoch=19
Epoch=20
Epoch=21
Epoch=22
Epoch=23
Epoch=24
Epoch=25
Epoch=26
Epoch=27
Epoch=28
Epoch=29
Epoch=30
Epoch=31
Epoch=32
Epoch=33
Epoch=34
Epoch=35
Epoch=36
Epoch=37
Epoch=38
Epoch=39
Epoch=40
Epoch=41
Epoch=42
Epoch=43
Epoch=44
Epoch=45
Epoch=46
Epoch=47
Epoch=48
Epoch=49
Epoch=50
Epoch=51
Epoch=52
Epoch=53
Epoch=54
Epoch=55
Epoch=56
Epoch=57
Epoch=58
Epoch=59
Training ended.


In [24]:
batches = 100

def getAccuracy(prediction: np.array, target: np.array) -> float:
    length = prediction.shape[0]
    correct = 0

    for i in range(length):
        if (np.argmax(prediction[i]) == np.argmax(target[i])):
            correct += 1
    return correct / length

meanAccuracy = 0.0
for batch in range(batches):
    xBatch = X_test_split[batch]
    yBatch = y_test_split[batch]

    predictionProb = forwardPropagation(xBatch, weights, biases)

    predictionAcc = getAccuracy(predictionProb, yBatch)
    meanAccuracy += predictionAcc
print(f"Accuracy={meanAccuracy / batches}")

Accuracy=0.9107
