In [1]:
import os
import sys
from pathlib import Path
base_dir = Path(os.getcwd()).resolve().parent
sys.path.append(str(base_dir))

import src
from src import nn
from src import optim
from src.nn import functional as F

In [2]:
# Load dataset and prep it (kinda)
import numpy as np
from typing import Literal

# Load from dir
train_data = np.loadtxt("./data/train.csv", delimiter=",", skiprows=1)  # skip header
test_data = np.loadtxt("./data/test.csv", delimiter=",", skiprows=1)  # skip header
print(f"train: {train_data.shape} | test: {test_data.shape}")

# Normalize
X_train = train_data[:, 1:] / 255.
y_train = train_data[:, 0].astype(int)
X_test = test_data[:, 1:] / 255.
y_test = test_data[:, 0].astype(int)
print(f"normalized data-splits")

class Dataset:
    def __init__(self, split: Literal["train", "test"], batch_size: int = 1):
        self.X_split = X_train if split == "train" else X_test
        self.y_split = y_train if split == "train" else y_test
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.y_split) / self.batch_size))

    def __getitem__(self, idx: int):
        start = idx * self.batch_size
        end = min((idx + 1) * self.batch_size, len(self.y_split))

        batch_X = self.X_split[start:end]
        batch_y = self.y_split[start:end]

        return src.Tensor(batch_X), src.Tensor(batch_y)  # Targets can stay as np.ndarray for now
    
train_dataset = Dataset(split="train")
test_dataset = Dataset(split="test")
print(f"train_dataset: {len(train_dataset)} | test_dataset: {len(test_dataset)}")

X, y = train_dataset[np.random.randint(low=0, high=len(train_dataset))]
print(f"Get random samples | X: {X.shape} | y: {y}")

train: (60000, 785) | test: (10000, 785)
normalized data-splits
train_dataset: 60000 | test_dataset: 10000
Get random samples | X: (1, 784) | y: tensor([1], requires_grad=False)


In [5]:
# training a model
epochs = 5
batch_size = 32
train_dataset = Dataset(split="train", batch_size=batch_size)
test_dataset = Dataset(split="test", batch_size=batch_size)


def accuracy_fn(logits, targets):
    if isinstance(targets, src.Tensor):
        targets = targets.data
    preds = np.argmax(logits.data, axis=-1)
    return (preds == targets).astype(np.float32).mean()

model = nn.Sequential(
    nn.Linear(784, 128, bias=True),
    nn.ReLU(),
    nn.Linear(128, 64, bias=True),
    nn.ReLU(),
    nn.Linear(64, 10, bias=True),
)
optimizer = optim.SGD(params=model.parameters(), lr=1e-2)

results = {
    "train_loss": [],
    "train_acc": [],
    "test_loss": [],
    "test_acc": [],
}

for epoch in range(epochs):
    # train-step
    train_loss = 0.0
    train_acc = 0.0
    indices = np.random.permutation(len(train_dataset))
    for i in indices:
        X, y = train_dataset[i]
        logits = model(X)
        loss = F.cross_entropy_loss(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.data
        train_acc += accuracy_fn(logits, y)
    
    # test-step
    test_loss = 0.0
    test_acc = 0.0
    indices = np.random.permutation(len(test_dataset))
    for i in indices:
        X, y = train_dataset[i]
        logits = model(X)
        loss = F.cross_entropy_loss(logits, y)
        loss.backward()
        
        test_loss += loss.data
        test_acc += accuracy_fn(logits, y)
    
    train_loss /= len(train_dataset)
    train_acc /= len(train_dataset)
    test_loss /= len(test_dataset)
    test_acc /= len(test_dataset)

    results["train_loss"].append(float(train_loss))
    results["train_acc"].append(float(train_acc))
    results["test_loss"].append(float(test_loss))
    results["test_acc"].append(float(test_acc))
    
    print(f"Epoch {epoch+1}/{epochs} | train_loss: {train_loss:.4f} | train_acc: {train_acc*100:.2f}% | test_loss: {test_loss:.4f} | test_acc: {test_acc*100:.2f}%")


Epoch 1/5 | train_loss: 0.7452 | train_acc: 80.43% | test_loss: 0.3445 | test_acc: 90.53%
Epoch 2/5 | train_loss: 0.3181 | train_acc: 90.90% | test_loss: 0.2697 | test_acc: 92.64%
Epoch 3/5 | train_loss: 0.2658 | train_acc: 92.44% | test_loss: 0.2334 | test_acc: 93.49%
Epoch 4/5 | train_loss: 0.2325 | train_acc: 93.37% | test_loss: 0.2063 | test_acc: 94.43%
Epoch 5/5 | train_loss: 0.2063 | train_acc: 94.21% | test_loss: 0.1844 | test_acc: 94.93%


In [6]:
results

{'train_loss': [0.7452069667786362,
  0.31814326080513516,
  0.2658484651418133,
  0.23252920236368826,
  0.20632705522404887],
 'train_acc': [0.8043166399002075,
  0.9090499877929688,
  0.9243500232696533,
  0.9337000250816345,
  0.9421333074569702],
 'test_loss': [0.34448568712669314,
  0.26969813878526966,
  0.23342200062447518,
  0.20630891293219572,
  0.18439942301336493],
 'test_acc': [0.9052516222000122,
  0.9264177083969116,
  0.934904158115387,
  0.9442891478538513,
  0.9492811560630798]}