In [18]:
from pathlib import Path
import sys

# Detect project root (the folder containing src/, data/, models/, etc.)
REPO_ROOT = Path(__file__).resolve().parent.parent if "__file__" in globals() else Path.cwd()

# Ensure `src/` is importable
if str(REPO_ROOT / "src") not in sys.path:
    sys.path.insert(0, str(REPO_ROOT / "src"))

print("Using repo root:", REPO_ROOT)

# Paths
DATA_DIR = REPO_ROOT / "data"
MODELS_DIR = REPO_ROOT / "models"

Using repo root: /Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/notebooks


In [7]:
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

# project imports
from src.kmnist.models.mlp import MLPWide
from src.kmnist.train import train_model, pick_device
from src.kmnist.labels import KMNIST_CLASSES

DATA_DIR   = Path("/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/data")
MODELS_DIR = Path("models")

device = pick_device()
device

device(type='mps')

In [19]:
#DATA_DIR = Path("/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/data")
x_train = np.load(DATA_DIR / "kmnist-train-imgs.npz")["arr_0"]
y_train = np.load(DATA_DIR / "kmnist-train-labels.npz")["arr_0"]

x_test  = np.load(DATA_DIR / "kmnist-test-imgs.npz")["arr_0"]
y_test  = np.load(DATA_DIR / "kmnist-test-labels.npz")["arr_0"]

FileNotFoundError: [Errno 2] No such file or directory: '/Users/aaditya/Workspace/Learning/Github/projects/kmnist-recognizer/notebooks/data/kmnist-train-imgs.npz'

In [9]:
class KMNISTArrayDataset(Dataset):
    """
    Wraps numpy arrays (H,W) uint8 -> torch tensors [1,28,28] float in [0,1].
    No inversion is applied (KMNIST is already dark bg / light fg).
    """
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, i):
        img = torch.from_numpy(self.x[i]).float() / 255.0     # [28,28] -> [0,1]
        img = img.unsqueeze(0)                                # [1,28,28]
        label = int(self.y[i])
        return img, label

full_train_ds = KMNISTArrayDataset(x_train, y_train)
test_ds       = KMNISTArrayDataset(x_test,  y_test)
len(full_train_ds), len(test_ds)

(60000, 10000)

In [10]:
# 55k train / 5k val split
val_size = 5000
train_size = len(full_train_ds) - val_size
train_ds, val_ds = random_split(full_train_ds, [train_size, val_size], generator=torch.Generator().manual_seed(42))

BATCH_SIZE = 256
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

len(train_ds), len(val_ds), len(test_ds)

(55000, 5000, 10000)

In [11]:
model = MLPWide(p=0.35)  # same architecture you used for the app
model

MLPWide(
  (net): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): GELU(approximate='none')
    (4): Dropout(p=0.35, inplace=False)
    (5): Linear(in_features=512, out_features=256, bias=True)
    (6): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): GELU(approximate='none')
    (8): Dropout(p=0.35, inplace=False)
    (9): Linear(in_features=256, out_features=128, bias=True)
    (10): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): GELU(approximate='none')
    (12): Dropout(p=0.35, inplace=False)
    (13): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [13]:
EPOCHS = 40
LR     = 3e-3

history = train_model(
    model=model,
    train_loader=train_loader,
    epochs=EPOCHS,
    lr=LR,
    val_loader=val_loader,
    out_path=MODELS_DIR,       # <— saved here
    early_stopping=True,
    patience=8,
    min_delta=1e-4,
    save_best_only=False      # final run is always saved as a new version; best.pt updated on improvement
)

history

Epoch 01 | train acc 0.860 loss 0.457 | val acc 0.932 loss 0.225
Epoch 02 | train acc 0.928 loss 0.235 | val acc 0.948 loss 0.174
Epoch 03 | train acc 0.944 loss 0.183 | val acc 0.950 loss 0.159
Epoch 04 | train acc 0.954 loss 0.151 | val acc 0.955 loss 0.142
Epoch 05 | train acc 0.963 loss 0.119 | val acc 0.957 loss 0.140
Epoch 06 | train acc 0.966 loss 0.107 | val acc 0.961 loss 0.131
Epoch 07 | train acc 0.970 loss 0.095 | val acc 0.965 loss 0.127
Epoch 08 | train acc 0.974 loss 0.083 | val acc 0.964 loss 0.128
Epoch 09 | train acc 0.975 loss 0.077 | val acc 0.966 loss 0.124
Epoch 10 | train acc 0.978 loss 0.068 | val acc 0.964 loss 0.127
Epoch 11 | train acc 0.980 loss 0.059 | val acc 0.971 loss 0.115
Epoch 12 | train acc 0.981 loss 0.057 | val acc 0.966 loss 0.136
Epoch 13 | train acc 0.982 loss 0.053 | val acc 0.967 loss 0.120
Epoch 14 | train acc 0.985 loss 0.049 | val acc 0.969 loss 0.122
Epoch 15 | train acc 0.985 loss 0.047 | val acc 0.969 loss 0.123
Epoch 16 | train acc 0.98

{'train_loss': 0.029718816976926545,
 'train_acc': 0.9904181817228144,
 'val_loss': 0.11457726734876633,
 'model_path': 'models/mlp_model_v002_acc0.972.pt',
 'best_val_acc': 0.9715999998092651}

In [14]:
from glob import glob
sorted(glob(str(MODELS_DIR / "mlp_model_v*.pt")))

['models/mlp_model_v001.pt', 'models/mlp_model_v002_acc0.972.pt']

In [15]:
# best model (updated whenever val loss improved)
(Path(MODELS_DIR) / "best.pt").exists(), (MODELS_DIR / "best.pt")

(True, PosixPath('models/best.pt'))

In [16]:
import torch.nn.functional as F

@torch.no_grad()
def evaluate(model: nn.Module, loader: DataLoader, device: torch.device):
    model.eval()
    model.to(device)
    correct, total, loss_sum = 0, 0, 0.0
    criterion = nn.CrossEntropyLoss()
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss_sum += criterion(logits, yb).item() * xb.size(0)
        pred = logits.argmax(dim=1)
        correct += (pred == yb).sum().item()
        total   += xb.size(0)
    return {"loss": loss_sum/total, "acc": correct/total}

test_metrics = evaluate(model, test_loader, device)
test_metrics

{'loss': 0.387778296661377, 'acc': 0.9198}

In [17]:
best_model = MLPWide(p=0.35)
best_state = torch.load(MODELS_DIR / "best.pt", map_location="cpu")
best_model.load_state_dict(best_state)

best_test_metrics = evaluate(best_model, test_loader, device)
best_test_metrics

{'loss': 0.318680917596817, 'acc': 0.9149}

In [None]:
from pathlib import Path
import sys

# Detect project root (the folder containing src/, data/, models/, etc.)
REPO_ROOT = Path(__file__).resolve().parent.parent if "__file__" in globals() else Path.cwd()

# Ensure `src/` is importable
if str(REPO_ROOT / "src") not in sys.path:
    sys.path.insert(0, str(REPO_ROOT / "src"))

print("Using repo root:", REPO_ROOT)

# Paths
DATA_DIR = REPO_ROOT / "data"
MODELS_DIR = REPO_ROOT / "models"