# LIbraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import kaggle
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile
import os

# Data loading

Source:
- <a href="https://www.kaggle.com/datasets/oddrationale/mnist-in-csv/data">DARIEL DATO-ON -MNIST in CSV
</a>

In [14]:
api = KaggleApi()
api.authenticate()

files = ["mnist_train.csv", "mnist_test.csv"]

os.mkdir("data")

for file in files:
    file_base = file.split(".")[0]

    api.dataset_download_file(
        dataset = "oddrationale/mnist-in-csv/",
        file_name = file,
        path = f"data/{file}"
    )

    os.rename(f"data/{file}", f"data/{file_base}")

    with zipfile.ZipFile(f"data/{file_base}/{file}.zip", 'r') as zip_ref:
        zip_ref.extractall("data")

    os.remove(f"data/{file_base}/{file}.zip")
    os.rmdir(f"data/{file_base}")

Dataset URL: https://www.kaggle.com/datasets/oddrationale/mnist-in-csv/versions/
Dataset URL: https://www.kaggle.com/datasets/oddrationale/mnist-in-csv/versions/


# Data preprocessing

In [4]:
data_train = pd.read_csv("./data/mnist_train.csv").to_numpy()
X_train = data_train[:, 1:] / 255.0
y_train = data_train[:, 0]

data_test = pd.read_csv("./data/mnist_test.csv").to_numpy()
X_test = data_test[:, 1:] / 255.0
y_test = data_test[:, 0]

# Model

Sources:
- <a href="https://www.youtube.com/playlist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi">3Blue1Brown</a>
- <a href="https://www.youtube.com/watch?v=w8yWXqWQYmU">Samson Zhang - Building a neural network FROM SCRATCH</a>

In [23]:
class NeuralNetwork:
    def __init__(self, lr:float=0.01, n_iters:int=1000):
        self.lr = lr
        self.n_iters = n_iters

        self.n_samples = None
        self.n_features = None

    def _init_params(self):
        self.W1 = np.random.rand(10, self.n_features) - 0.5
        self.b1 = np.random.rand(10, 1) - 0.5
        self.W2 = np.random.rand(10, 10) - 0.5
        self.b2 = np.random.rand(10, 1) - 0.5

    def _ReLU(self, Z):
        return np.maximum(Z, 0)
    
    def _ReLU_deriv(self, Z):
        return Z > 0
    
    def _softmax(self, Z):
        return np.exp(Z) / sum(np.exp(Z))
    
    def _one_hot(self, Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        one_hot_Y = one_hot_Y.T
        return one_hot_Y
    
    def _forward_prop(self, X):
        Z1 = self.W1.dot(X) + self.b1
        A1 = self._ReLU(Z1)
        Z2 = self.W2.dot(A1) + self.b2
        A2 = self._softmax(Z2)
        return Z1, A1, Z2, A2

    def _backward_prop(self, Z1, A1, Z2, A2, X, Y):
        one_hot_Y = self._one_hot(Y)
        dZ2 = A2 - one_hot_Y
        dW2 = 1 / self.n_samples * dZ2.dot(A1.T)
        db2 = 1 / self.n_samples * np.sum(dZ2)
        dZ1 = self.W2.T.dot(dZ2) * self._ReLU_deriv(Z1)
        dW1 = 1 / self.n_samples * dZ1.dot(X.T)
        db1 = 1 / self.n_samples * np.sum(dZ1)
        return dW1, db1, dW2, db2
    
    def _update_params(self, dW1, db1, dW2, db2):
        self.W1 = self.W1 - self.lr * dW1
        self.b1 = self.b1 - self.lr * db1
        self.W2 = self.W2 - self.lr * dW2
        self.b2 = self.b2 - self.lr * db2

    def _get_predictions(self, A2):
        return np.argmax(A2, 0)

    def _get_accuracy(self, y_real, y_pred):
        return np.sum(y_real == y_pred) / y_real.size

    def fit(self, X, Y):
        self.n_samples, self.n_features = X.shape
        self._init_params()

        X = X.T

        for i in range(self.n_iters):
            Z1, A1, Z2, A2 = self._forward_prop(X)
            dW1, db1, dW2, db2 = self._backward_prop(Z1, A1, Z2, A2, X, Y)
            self._update_params(dW1, db1, dW2, db2)

            if (i+1) % 10 == 0:
                print("Iteration: {}  |  Accuracy: {:.2f}%".format(
                    i+1,
                    self._get_accuracy(self._get_predictions(A2), Y) * 100
                ), end="\r")

    def predict(self, X):
        _, _, _, A2 = self._forward_prop(X.T)
        predictions = self._get_predictions(A2)
        return predictions

# Train

In [30]:
nn = NeuralNetwork(0.1, 1000)
nn.fit(X_train, y_train)

Iteration: 1000  |  Accuracy: 87.99%

# Evaluation

In [31]:
from sklearn.metrics import accuracy_score

y_pred = nn.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}".format(acc*100))

Accuracy: 88.37
