In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

In [13]:
import sys
from pathlib import Path
from torch.utils.data import DataLoader, random_split

# Find repo root (folder containing `src`)
REPO_ROOT = Path(__file__).resolve().parent.parent if "__file__" in globals() else Path.cwd().parent
sys.path.insert(0, str(REPO_ROOT))

print("Using repo root:", REPO_ROOT)

Using repo root: /Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer


In [14]:
from pathlib import Path
import sys

# Walk up until we find a folder that contains 'src'
here = Path.cwd()
REPO_ROOT = None
for c in [here, *here.parents]:
    if (c / "src").is_dir():
        REPO_ROOT = c
        break

if REPO_ROOT is None:
    raise RuntimeError("Couldn't find project root (folder containing 'src').")

# Make 'src' importable
sys.path.insert(0, str(REPO_ROOT / "src"))
print("Using repo root:", REPO_ROOT)

DATA_DIR   = REPO_ROOT / "data"
MODELS_DIR = REPO_ROOT / "models"

import numpy as np
from src.kmnist.models.mlp import MLPWide
from src.kmnist.train import train_model, pick_device

Using repo root: /Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer


In [15]:
# Expect these four files in data/
x_train = np.load(DATA_DIR / "kmnist-train-imgs.npz")["arr_0"]  # (60000, 28, 28)
y_train = np.load(DATA_DIR / "kmnist-train-labels.npz")["arr_0"]  # (60000,)
x_test  = np.load(DATA_DIR / "kmnist-test-imgs.npz")["arr_0"]     # (10000, 28, 28)
y_test  = np.load(DATA_DIR / "kmnist-test-labels.npz")["arr_0"]   # (10000,)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [16]:
import torch
from torch.utils.data import Dataset  # <-- missing import

class KMNISTArrayDataset(Dataset):
    """
    Wrap numpy arrays (H,W) uint8 -> torch tensors [1,28,28] float in [0,1].
    KMNIST already uses dark background / light foreground, so no inversion here.
    """
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, i):
        img = torch.from_numpy(self.x[i]).float() / 255.0  # [28,28] -> [0,1]
        img = img.unsqueeze(0)                             # [1,28,28]
        label = int(self.y[i])
        return img, label

In [17]:
full_train_ds = KMNISTArrayDataset(x_train, y_train)
test_ds       = KMNISTArrayDataset(x_test,  y_test)

# 55k train / 5k val split (deterministic)
val_size = 5000
train_size = len(full_train_ds) - val_size
train_ds, val_ds = random_split(
    full_train_ds,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

BATCH_SIZE = 256
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

len(train_ds), len(val_ds), len(test_ds)

(55000, 5000, 10000)

In [18]:
device = pick_device()
print("device:", device)

model = MLPWide(p=0.35)

EPOCHS = 40
LR     = 3e-3

history = train_model(
    model=model,
    train_loader=train_loader,
    epochs=EPOCHS,
    lr=LR,
    val_loader=val_loader,  # optional; will print val metrics
    out_dir=MODELS_DIR,     # <-- only this, no best.pt, no early stopping logic
    prefix="mlp_model"      # filenames: mlp_model_v001.pt, v002, ...
)

history

device: mps
Epoch 01 | train acc 0.864 loss 0.449 | val acc 0.929 loss 0.228
Epoch 02 | train acc 0.926 loss 0.241 | val acc 0.944 loss 0.185
Epoch 03 | train acc 0.944 loss 0.183 | val acc 0.954 loss 0.153
Epoch 04 | train acc 0.954 loss 0.150 | val acc 0.958 loss 0.141
Epoch 05 | train acc 0.961 loss 0.124 | val acc 0.963 loss 0.121
Epoch 06 | train acc 0.968 loss 0.105 | val acc 0.962 loss 0.127
Epoch 07 | train acc 0.971 loss 0.091 | val acc 0.964 loss 0.125
Epoch 08 | train acc 0.973 loss 0.086 | val acc 0.964 loss 0.127
Epoch 09 | train acc 0.976 loss 0.074 | val acc 0.966 loss 0.118
Epoch 10 | train acc 0.978 loss 0.068 | val acc 0.967 loss 0.118
Epoch 11 | train acc 0.980 loss 0.063 | val acc 0.967 loss 0.117
Epoch 12 | train acc 0.981 loss 0.058 | val acc 0.968 loss 0.112
Epoch 13 | train acc 0.982 loss 0.055 | val acc 0.967 loss 0.120
Epoch 14 | train acc 0.983 loss 0.051 | val acc 0.968 loss 0.112
Epoch 15 | train acc 0.985 loss 0.047 | val acc 0.971 loss 0.117
Epoch 16 | tr

{'train_loss': 0.020123697447979993, 'train_acc': 0.9932363636363636}

In [21]:
import torch.nn.functional as F

@torch.no_grad()
def evaluate(model: nn.Module, loader: DataLoader, device: torch.device):
    model.eval().to(device)
    correct, total, loss_sum = 0, 0, 0.0
    criterion = nn.CrossEntropyLoss()
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss_sum += criterion(logits, yb).item() * xb.size(0)
        pred = logits.argmax(dim=1)
        correct += (pred == yb).sum().item()
        total   += xb.size(0)
    return {"loss": loss_sum/total, "acc": correct/total}

test_metrics = evaluate(model, test_loader, device)
test_metrics

{'loss': 0.3702347909927368, 'acc': 0.9276}

In [22]:
from glob import glob

saved = sorted(glob(str(MODELS_DIR / "mlp_model_v*.pt")))
saved[-3:]

['/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/models/mlp_model_v001.pt',
 '/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/models/mlp_model_v002.pt',
 '/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/models/mlp_model_v003.pt']

In [23]:
# Load the last saved version and evaluate
last_path = sorted(glob(str(MODELS_DIR / "mlp_model_v*.pt")))[-1]
print("Loading:", last_path)

reloaded = MLPWide(p=0.35)
reloaded.load_state_dict(torch.load(last_path, map_location="cpu"))
reloaded_metrics = evaluate(reloaded, test_loader, device)
reloaded_metrics

Loading: /Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/models/mlp_model_v003.pt


{'loss': 0.3702347909927368, 'acc': 0.9276}