In [89]:
import numpy as np
import cv2 as cv
import os

In [90]:
def load_data(path, split_data=1):
    data = []
    labels = []
    listpath = os.listdir(path)
    total = len(listpath)
    for subpath in listpath:
        complete_subpath = os.path.join(path, subpath)
        images = os.listdir(complete_subpath)
        total_images = len(images)
        labels.extend([int(subpath) for _ in range(int(total_images*split_data))])
        for idx, image_name in enumerate(images[:int(total_images*split_data)]):
            image_path = os.path.join(complete_subpath, image_name)
            image = cv.cvtColor(cv.imread(image_path), cv.COLOR_BGR2GRAY).reshape(-1)
            data.append(image)
            print(f'Images: [{idx+1}/{int(total_images*split_data)}]> in label [{subpath}/{total-1}]', end='\r')
    return np.array(data), np.array(labels)

In [91]:
def one_hot(data, num_classes):
    new_data = np.zeros((data.shape[0], num_classes))
    new_data[np.arange(data.shape[0]), data] = 1
    return new_data

In [92]:
path = r'\datasets\fashion_mnist_images' # Complete the path
path_train = path + r'\train'
path_test = path + r'\test'

In [93]:
X_train, y_train = load_data(path_train)
X_test, y_test = load_data(path_test)

Images: [1000/1000]> in label [9/9]

In [94]:
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
y_train = one_hot(y_train, 10)
y_test = one_hot(y_test, 10)

In [95]:
def ReLU(x, derv=False):
    if derv: return np.where(x>0, 1, 0)
    return np.maximum(x, 0)

In [96]:
def softmax(x, derv=False):
    x_exp = np.exp(x - np.max(x, axis=1, keepdims=True))
    s = x_exp / x_exp.sum(axis=1, keepdims=True)
    if derv: return s * (1 - s)
    return s

In [97]:
def CCE(y, y_pred, epsilon=1e-8, derv=False):
    if derv: return y_pred-y # In this case
    return -np.mean(y*np.log(y_pred+epsilon))

In [98]:
def accuracy(y, y_pred):
    return np.mean(np.argmax(y, axis=1)==np.argmax(y_pred, axis=1))

In [99]:
n_INPUTS = X_train.shape[1]
n_hidden = 64
n_OUTPUTS = y_train.shape[1]

In [100]:
# Layer 1
W0 = np.random.randn(n_INPUTS, n_hidden) * np.sqrt(2/n_INPUTS)
b0 = np.zeros((1, n_hidden))

# Layer 2
W1 = np.random.randn(n_hidden, n_OUTPUTS) * np.sqrt(2/n_hidden)
b1 = np.zeros((1, n_OUTPUTS))

In [101]:
def forward(x):
    global z0, a0, z1, a1
    
    z0 = x @ W0 + b0
    a0 = ReLU(z0)
    
    z1 = a0 @ W1 + b1
    a1 = softmax(z1)

    return a1

In [102]:
def update_params(dW0, db0, dW1, db1, lr):
    global W0, W1, b0, b1

    W1 -= lr * dW1
    b1 -= lr * db1

    W0 -= lr * dW0
    b0 -= lr * db0

In [103]:
def backward(x=None, y=None, outp=None, deltas=None, learn=False, lr=0.01):
    if y is not None:
        dL = CCE(y, outp, derv=True)
    elif deltas is not None:
        dL = deltas # We are not gonna use this.
    else: raise ValueError('There are no (y) or (deltas).')
    # dL *= softmax(z1, derv=True)
    m = dL.shape[0]

    dW1 = a0.T @ dL / m
    db1 = dL.sum(axis=0, keepdims=True) / m

    da0 = dL @ W1.T
    dz0 = da0 * ReLU(z0, derv=True)

    dW0 = x.T @ dz0 / m
    db0 = dz0.sum(axis=0, keepdims=True) / m

    deltas = dz0 @ W0.T

    if learn:
        update_params(dW0, db0, dW1, db1, lr)

    return deltas, dW0, db0, dW1, db1

In [104]:
def train(x, y, epochs=100, batch_size=32, lr=0.01, print_every=0.1):
    for epoch in range(1, epochs+1):
        for batch in range(0, x.shape[0], batch_size):
            x_batch = x[batch:batch+batch_size]
            y_batch = y[batch:batch+batch_size]

            predictions = forward(x_batch)

            backward(x_batch, y_batch, predictions, learn=True, lr=lr)

        if epoch % max(1, int(epochs*print_every)) == 0:
            print(f'Epochs: [{epoch}/{epochs}]> Loss: {CCE(y, forward(x)):.4f}')

In [105]:
train(
    x=X_train,
    y=y_train,
    epochs=100,
    batch_size=128,
    lr=0.001,
    print_every=0.1
)

Epochs: [10/100]> Loss: 0.1348
Epochs: [20/100]> Loss: 0.1259
Epochs: [30/100]> Loss: 0.1216
Epochs: [40/100]> Loss: 0.1178
Epochs: [50/100]> Loss: 0.1142
Epochs: [60/100]> Loss: 0.1109
Epochs: [70/100]> Loss: 0.1069
Epochs: [80/100]> Loss: 0.1028
Epochs: [90/100]> Loss: 0.1007
Epochs: [100/100]> Loss: 0.0987


In [106]:
accuracy(y_test, forward(X_test))

np.float64(0.6465)