## Load MNIST dataset

In [23]:
import math
import random

import numpy as np
from torchvision.datasets import MNIST
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

def download_mnist(is_train: bool):
    dataset = MNIST(root="./data",
                    transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)

    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels

train_data, train_labels = download_mnist(True)
test_data, test_labels = download_mnist(False)

train_data = np.array(train_data)
train_labels = np.array(train_labels)

test_data = np.array(test_data)
test_labels = np.array(test_labels)

## Normalize data and Convert labels

In [24]:
def normalize(data, min_value, max_value):
    return (data - min_value) / (max_value - min_value)

def one_hot_encode(labels, num_categories):
    length = len(labels)
    converted_labels = np.full((length, num_categories), 0, dtype=np.int32)

    for i in range(length):
        converted_labels[i][labels[i]] = 1
        
    return converted_labels

classes = np.max(train_labels) + 1

min_value = min(np.min(train_data), np.min(test_data))
max_value = max(np.max(train_data), np.max(test_data))

train_data = normalize(train_data, min_value, max_value)
test_data = normalize(test_data, min_value, max_value)

train_labels = one_hot_encode(train_labels, 10)
test_labels = one_hot_encode(test_labels, 10)

In [None]:
print(classes)

## Training

In [99]:
import random

num_epochs = 50
batch_size = 100
learning_factor = 0.01
inputs = len(train_data[0])
weights = np.zeros((classes, inputs), dtype=np.float32)
bias = np.zeros(classes, dtype=np.float32)

for i in range(classes):
    for j in range(inputs):
        weights[i][j] = random.uniform(0.01, 0.99)
        
for i in range(classes):
    bias[i] = random.uniform(0.01, 0.99)

def split_batch(data, labels):
    num_batches = len(data) // batch_size + 1

    data_batches = []
    label_batches = []

    for i in range(num_batches - 1):
        data_batches.append(data[i * batch_size:(i + 1) * batch_size])
        label_batches.append(labels[i * batch_size:(i + 1) * batch_size])

    data_batches.append(data[(num_batches - 1) * batch_size:])
    label_batches.append(labels[(num_batches - 1) * batch_size:])

    return data_batches, label_batches


def forward_propagation(sample_data, sample_labels):
    class_sums = np.zeros(classes, dtype=np.float32)

    for _class in range(classes):
        class_sums[_class] = np.dot(sample_data, weights[_class]) + bias[_class]
        
    max_class_sum = np.max(class_sums)
    class_sums -= max_class_sum
    
    exp_class_sums = np.exp(class_sums)
    
    probabilities = exp_class_sums / np.sum(exp_class_sums)

    predicted_class = np.argmax(probabilities)

    error = sample_labels - probabilities

    return predicted_class, error


def train(data_batch, label_batch, weights, bias):
    for batch_index in range(len(data_batch)):
        sample_data = data_batch[batch_index]
        sample_labels = label_batch[batch_index]

        predicted_class, error = forward_propagation(sample_data, sample_labels)

        for _class in range(classes):
            weights[_class] += learning_factor * error[_class] * sample_data
            bias[_class] += learning_factor * error[_class]
            
    return weights, bias

def get_perceptron_accuracy():
    length = len(test_data)
    success_cases = 0
    for i in range(length):
        predicted_class, error = forward_propagation(test_data[i], test_labels[i])
        
        if test_labels[i][predicted_class] == 1:
            success_cases += 1
            
    return success_cases / length

data_batches, label_batches = split_batch(train_data, train_labels)
for epoch in range(num_epochs):
    print("Processing: " + str(epoch) + "/" + str(num_epochs) + ": " + str(get_perceptron_accuracy()))
    for i in range(len(data_batches)):
        new_weights, new_biases = train(data_batches[i], label_batches[i], weights, bias)
        weights = new_weights
        bias = new_biases

Processing: 0/50: 0.0697
Processing: 1/50: 0.9001
Processing: 2/50: 0.9063
Processing: 3/50: 0.9085
Processing: 4/50: 0.9098
Processing: 5/50: 0.9111
Processing: 6/50: 0.9118
Processing: 7/50: 0.9117
Processing: 8/50: 0.9121
Processing: 9/50: 0.9121
Processing: 10/50: 0.9122
Processing: 11/50: 0.9122
Processing: 12/50: 0.912
Processing: 13/50: 0.9119
Processing: 14/50: 0.9122
Processing: 15/50: 0.9123
Processing: 16/50: 0.912
Processing: 17/50: 0.912
Processing: 18/50: 0.9122
Processing: 19/50: 0.9131
Processing: 20/50: 0.9133
Processing: 21/50: 0.9132
Processing: 22/50: 0.913
Processing: 23/50: 0.913
Processing: 24/50: 0.913
Processing: 25/50: 0.9132
Processing: 26/50: 0.9131
Processing: 27/50: 0.9132
Processing: 28/50: 0.9134
Processing: 29/50: 0.9133
Processing: 30/50: 0.9137
Processing: 31/50: 0.9138
Processing: 32/50: 0.914
Processing: 33/50: 0.9141
Processing: 34/50: 0.914
Processing: 35/50: 0.9141
Processing: 36/50: 0.9141
Processing: 37/50: 0.9144
Processing: 38/50: 0.9145
Proc

## Testing

In [100]:
print(get_perceptron_accuracy())

0.9143
