In [4]:
import kagglehub


path = kagglehub.dataset_download("hojjatk/mnist-dataset")


In [5]:
import numpy as np # linear algebra
import struct
from array import array
from os.path  import join

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath,training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath
    
    def read_images_labels(self, images_filepath, labels_filepath):        
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())        
        
        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data =  array("B", file.read())        
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img            
        
        return images, labels
            
    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (x_train, y_train),(x_test, y_test)      

In [6]:
import pandas as pd
import os
from sklearn.utils import shuffle



def ReLU(x):
    return np.maximum(0, x)

def dReLU(x):
     return (x > 0).astype(x.dtype)

def SoftMax(x: np.ndarray, axis: int = -1) -> np.ndarray:
    x_shift = x - np.max(x, axis=axis, keepdims=True)
    exps = np.exp(x_shift)
    return exps / np.sum(exps, axis=axis, keepdims=True)

def dSoftMax(s: np.ndarray, upstream: np.ndarray, axis: int = -1) -> np.ndarray:
    proj = np.sum(upstream * s, axis=axis, keepdims=True)
    return s * (upstream - proj)

class Network:
    def __init__(self, w, h, s, classes):
        self._input  = np.random.randn(w * h, s) * 0.01
        self._b1     = np.zeros(s)
        self._output = np.random.randn(s, classes) * 0.01
        self._b2     = np.zeros(classes)
        self._size   = s
        self._classes = classes

    def forward(self, X):
        z1 = X @ self._input + self._b1
        a1 = ReLU(z1)
        z2 = a1 @ self._output + self._b2
        yhat = SoftMax(z2)
        return yhat

    def loss(self, prop, y_true):
        return (prop - y_true) ** 2

    def backward(self, X, y, learning_rate=0.1):

        z1 = X @ self._input + self._b1
        a1 = ReLU(z1)
        z2 = a1 @ self._output + self._b2
        yhat = SoftMax(z2)

        B = X.shape[0]
        
        loss = -np.sum(y * np.log(yhat + 1e-8)) / B
        
        dz2 = (yhat - y) / B

        dW2 = a1.T @ dz2
        db2 = np.sum(dz2, axis=0)

        da1 = dz2 @ self._output.T
        dz1 = da1 * dReLU(z1)

        dW1 = X.T @ dz1
        db1 = np.sum(dz1, axis=0)

        self._input -= learning_rate * dW1
        self._b1 -= learning_rate * db1
        self._output -= learning_rate * dW2
        self._b2 -= learning_rate * db2
        return yhat, loss
    


def accuracy(y_pred, y_true_labels):
    return np.mean(np.argmax(y_pred, axis=1) == y_true_labels)

def one_hot(y, num_classes):
    return np.eye(num_classes, dtype=np.float32)[y]

def __main__():
  (num_classes, w, h) = (10, 28, 28)
  test_df = os.path.join(path, 't10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
  test_lb = os.path.join(path, 't10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')
  train_lb = os.path.join(path, 'train-labels-idx1-ubyte/train-labels-idx1-ubyte')
  train_df = os.path.join(path, 'train-images-idx3-ubyte/train-images-idx3-ubyte')

  mnist_dataloader = MnistDataloader(train_df, train_lb, test_df, test_lb)
  (x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

  x_train = np.asarray(x_train)
  x_test  = np.asarray(x_test)
  y_train = np.asarray(y_train)
  y_test  = np.asarray(y_test)

  x_train = (x_train.astype(np.float32) / 255.0).reshape(-1, w * h)
  x_test  = (x_test.astype(np.float32)  / 255.0).reshape(-1, w * h)
  y_train_oh = one_hot(y_train, num_classes)
  y_test_oh  = one_hot(y_test,  num_classes)


  np.random.seed(42)
  model = Network(w=w, h=h, s=300, classes=num_classes)

#   print(x_train[0])
  print("Data verification:")
  print(f"x_train type: {type(x_train)}")
  print(f"x_train shape: {(x_train.shape)}")
  print(f"y_train_oh type: {type(y_train_oh)}")
  print(f"y_train_oh shape: {y_train_oh.shape}")
  epochs = 20
  batch_size = 32
  lr = 0.1

  n = x_train.shape[0]
  steps_per_epoch = (n + batch_size - 1)

  for epoch in range(1, epochs + 1):
      idx = np.random.permutation(len(x_train))
      x_train_shuf = x_train[idx]
      y_train_shuf = y_train_oh[idx]
      epoch_loss = 0.0
      for i in range(0, n, batch_size):
          xb = x_train_shuf[i:i+batch_size]
          yb = y_train_shuf[i:i+batch_size]

          yhat, loss_b = model.backward(xb, yb, learning_rate=lr)
          epoch_loss += loss_b * (batch_size)

      epoch_loss /= n

      yhat_train = model.forward(x_train[:5000])
      train_acc = accuracy(yhat_train, y_train[:5000])

      yhat_test = model.forward(x_test)
      test_acc = accuracy(yhat_test, y_test)

      print(f"Epoch {epoch:02d}/{epochs} | loss={epoch_loss:.4f} | "
            f"train_acc≈{train_acc:.4f} | test_acc={test_acc:.4f}")

  preds = np.argmax(model.forward(x_test[:10]), axis=1)
  print("Preds:", preds)
  print("True :", y_test[:10])

if __name__ == "__main__":
  __main__()

Data verification:
x_train type: <class 'numpy.ndarray'>
x_train shape: (60000, 784)
y_train_oh type: <class 'numpy.ndarray'>
y_train_oh shape: (60000, 10)
Epoch 01/20 | loss=0.3884 | train_acc≈0.9518 | test_acc=0.9399
Epoch 02/20 | loss=0.1642 | train_acc≈0.9694 | test_acc=0.9621
Epoch 03/20 | loss=0.1123 | train_acc≈0.9768 | test_acc=0.9703
Epoch 04/20 | loss=0.0854 | train_acc≈0.9840 | test_acc=0.9749
Epoch 05/20 | loss=0.0683 | train_acc≈0.9840 | test_acc=0.9735
Epoch 06/20 | loss=0.0566 | train_acc≈0.9888 | test_acc=0.9772
Epoch 07/20 | loss=0.0478 | train_acc≈0.9898 | test_acc=0.9801
Epoch 08/20 | loss=0.0401 | train_acc≈0.9918 | test_acc=0.9789
Epoch 09/20 | loss=0.0346 | train_acc≈0.9940 | test_acc=0.9806
Epoch 10/20 | loss=0.0294 | train_acc≈0.9936 | test_acc=0.9800
Epoch 11/20 | loss=0.0247 | train_acc≈0.9968 | test_acc=0.9801
Epoch 12/20 | loss=0.0214 | train_acc≈0.9964 | test_acc=0.9799
Epoch 13/20 | loss=0.0179 | train_acc≈0.9978 | test_acc=0.9816
Epoch 14/20 | loss=0.0157