In [1]:
import numpy as np
from PIL import Image

In [2]:
def encode_label(label: int) -> list:
    code = [0] * 10
    code[label] = 1
    return code

In [3]:
for i in range(10):
    print(encode_label(i))

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]


In [4]:
import os

def load_data(path: str):
    labels = os.listdir(path)
    
    X, y = [], []
    for label in labels:
        label_encoded = encode_label(int(label))

        for sample in os.listdir(f"{path}/{label}"):
            png = Image.open(f"{path}/{label}/{sample}")
            arr = np.array(png)
            X.append(arr.flatten())
            y.append(label_encoded)

    return np.array(X), np.array(y)

In [5]:
X_train, y_train = load_data("data/training")
X_test, y_test = load_data("data/testing")

In [6]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

In [7]:
batch_size = 32
input_size = 28 * 28
output_size = 10

h1 = input_size // 2
h2 = h1 // 4

In [8]:
# Interface
class Module:
    def forward(self, x):
        pass

    def backward(self):
        pass

    def __call__(self, x):
        return self.forward(x)

In [9]:
class Dense(Module):
    def __init__(self, in_dim, out_dim, weight_scale=1e-3):
        self.in_dim = in_dim
        self.out_dim = out_dim

        self.wdim = (in_dim, out_dim)
        self.W = np.random.normal(scale=weight_scale, size=self.wdim)
        self.bdim = (out_dim)
        self.b = np.zeros(out_dim)

    def forward(self, x):
        return np.dot(x, self.W) + self.b

In [10]:
def relu(x):
    return np.maximum(0, x)

In [11]:
def is_binary(arr):
    return np.all(np.logical_or(arr == 0, arr == 1))

In [12]:
def is_stochastic(arr):
    assert len(arr[arr < 0]) == 0
    assert len(arr[arr > 1]) == 0
    
    N, _ = arr.shape
    return np.allclose(np.sum(arr, axis=1), np.ones(N))

In [13]:
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

In [14]:
class FeedForwardNet(Module):
    def __init__(self):
        self.l1 = Dense(in_dim=input_size, out_dim=h1)
        self.l2 = Dense(in_dim=h1, out_dim=h2)
        self.l3 = Dense(in_dim=h2, out_dim=output_size)

    def forward(self, x):
        l1_out = relu(self.l1(x))
        l2_out = self.l2(l1_out)
        l3_out = self.l3(l2_out)
        return softmax(l3_out)

In [15]:
net = FeedForwardNet()
out = net(X_train[:100])

In [16]:
def cross_entropy_loss(pred, target):
    assert is_binary(target), "Target must contain all binary values"
    assert is_stochastic(pred), "Array must sum to 1"
    
    return -np.sum(np.dot(target.T, np.log(pred)))

In [17]:
cross_entropy_loss(out, y_train[:100])

2302.585166500708