In [1]:
import numpy as np
from model import Model
import func
from layers import Linear, ReLU, Conv2D, Flatten, LazyLinear, Attention
# import torch
# from torch import nn

In [2]:
def to_one_hot_vector(labels, low, high):
    labels = np.asarray(labels).flatten()
    num_classes = high - low + 1

    if np.any((labels < low) | (labels > high)):
        raise ValueError("Some labels are outside the specified (low, high) range.")

    one_hot = np.eye(num_classes)[labels - low]
    return one_hot

In [3]:
# model = Model([Linear(28*28, 64), Sigmoid(64, 64), Linear(64, 32), Sigmoid(32, 32), Linear(32, 16), Sigmoid(16, 16), Linear(16, 10)])
# model = Model([Linear(28*28, 64), ReLU(64, 64), Linear(64, 32), ReLU(32, 32), Linear(32, 16), ReLU(16, 16), Linear(16, 10)])
model = Model([
    Conv2D(5, 16, "Valid", (1, 28, 28)),
    ReLU(),
    Conv2D(5, 16, "Valid"),
    ReLU(),
    Attention(32), # not introducing another activation function as softmax is already used inside, plus want to better prevserve the attnetion results
    Conv2D(3, 32, "Valid"),
    ReLU(),
    Attention(64),
    Conv2D(3, 64, "valid"),
    ReLU(),
    Attention(128), 
    Conv2D(3, 64, "valid"),
    ReLU(),
    Flatten(),
    LazyLinear(128),
    ReLU(),
    Linear(128, 64),
    ReLU(),
    Linear(64, 10)
], show_dims=True)
acc = lambda log, lab: np.mean(np.argmax(log, axis=1) == np.argmax(lab, axis=1))

(1, 28, 28)
(16, 24, 24)
(16, 24, 24)
(16, 20, 20)
(16, 20, 20)
(16, 20, 20)
(32, 18, 18)
(32, 18, 18)
(32, 18, 18)
(64, 16, 16)
(64, 16, 16)
(64, 16, 16)
(64, 14, 14)
(64, 14, 14)
12544
128
128
64
64
10


In [4]:
model.compile(0.001, func.cross_entropy_loss, acc)

In [5]:
tr_fname = "MNIST_CSV/mnist_train.csv"
ts_fname = "MNIST_CSV/mnist_test.csv"

train_data = np.loadtxt(tr_fname, delimiter=',')
train_labels = to_one_hot_vector(train_data[:, 0].astype(int), low=0, high=9)
train_images = train_data[:, 1:].reshape(-1, 1, 28, 28)

test_data = np.loadtxt(ts_fname, delimiter=',')
test_labels = to_one_hot_vector(test_data[:, 0].astype(int), 0, 9)
test_images = test_data[:, 1:].reshape(-1, 1, 28, 28)

In [6]:
model.fit(train_images, train_labels, batch_size=64, epochs= 3, print_period=10)


batch_loss: 15.779229232595988, batch_metric: 0.125, batch: 1/938, epoch: 0

batch_loss: 7.026195326173831, batch_metric: 0.09375, batch: 94/938, epoch: 0

batch_loss: 4.2055048903633185, batch_metric: 0.140625, batch: 187/938, epoch: 0

batch_loss: 3.2507586216952906, batch_metric: 0.203125, batch: 280/938, epoch: 0

batch_loss: 2.747001803339646, batch_metric: 0.234375, batch: 373/938, epoch: 0

batch_loss: 2.721189493041144, batch_metric: 0.3125, batch: 466/938, epoch: 0

batch_loss: 1.9728424446551291, batch_metric: 0.3125, batch: 559/938, epoch: 0

batch_loss: 2.2107317090518235, batch_metric: 0.328125, batch: 652/938, epoch: 0

batch_loss: 2.0275123941560587, batch_metric: 0.390625, batch: 745/938, epoch: 0

batch_loss: 2.0204486861753272, batch_metric: 0.375, batch: 838/938, epoch: 0

batch_loss: 2.1614509045261032, batch_metric: 0.390625, batch: 931/938, epoch: 0





batch_loss: 2.0937244805389157, batch_metric: 0.34375, batch: 1/938, epoch: 1

batch_loss: 1.6769373834846557,