# Hybrid CNN-BiLSTM Framework for Real-Time Handwriting Recognition 

### 1. Imports and Dependencies

In [265]:
from __future__ import annotations
from datetime import datetime
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Optional, Sequence, Union, Tuple, Dict, Any
import json
import random
import string

import numpy as np
import pandas as pd
import torch
import torch.nn as nn

import matplotlib as mpl
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report, f1_score, confusion_matrix

### 2. Reproducibility

In [266]:
def seed_everything(seed: int = 42, deterministic: bool = True):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        # torch.use_deterministic_algorithms(True)  # Stricter determinism, may raise if ops unsupported
    else:
        torch.backends.cudnn.deterministic = False
        torch.backends.cudnn.benchmark = True

### 3. Load Data

In [267]:
def load_raw_splits(data_dir: Path):
    X_train = np.load(data_dir / "X_train.npy")
    X_val   = np.load(data_dir / "X_val.npy")
    X_test  = np.load(data_dir / "X_test.npy")

    y_train = np.load(data_dir / "y_train.npy", allow_pickle=True)
    y_val   = np.load(data_dir / "y_val.npy", allow_pickle=True)
    y_test  = np.load(data_dir / "y_test.npy", allow_pickle=True)

    len_train = np.load(data_dir / "len_train.npy")
    len_val   = np.load(data_dir / "len_val.npy")
    len_test  = np.load(data_dir / "len_test.npy")

    return X_train, X_val, X_test, y_train, y_val, y_test, len_train, len_val, len_test

print(np.unique(np.load("data/processed_imu/y_train.npy", allow_pickle=True)))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25]


### 4. DataLoaders

In [268]:
def make_loaders(
    X_train, y_train, len_train,
    X_val,   y_val,   len_val,
    X_test,  y_test,  len_test,
    batch_size: int,
    num_workers: int = 0,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    train_ds = TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.long),
        torch.tensor(len_train, dtype=torch.long),
    )
    val_ds = TensorDataset(
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(y_val, dtype=torch.long),
        torch.tensor(len_val, dtype=torch.long),
    )
    test_ds = TensorDataset(
        torch.tensor(X_test, dtype=torch.float32),
        torch.tensor(y_test, dtype=torch.long),
        torch.tensor(len_test, dtype=torch.long),
    )

    train_loader = DataLoader(
        train_ds, batch_size=batch_size, shuffle=True,
        num_workers=num_workers, drop_last=False,
        pin_memory=torch.cuda.is_available(),
        persistent_workers=(num_workers > 0),
    )
    val_loader = DataLoader(
        val_ds, batch_size=batch_size, shuffle=False,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available(),
        persistent_workers=(num_workers > 0),
    )
    test_loader = DataLoader(
        test_ds, batch_size=batch_size, shuffle=False,
        num_workers=num_workers,
        pin_memory=torch.cuda.is_available(),
        persistent_workers=(num_workers > 0),
    )
    return train_loader, val_loader, test_loader

### 5. Model Definition

In [None]:
class CNN_BiLSTM(nn.Module):
    def __init__(
        self,
        num_features: int,
        num_classes: int,
        hidden_size: int = 128,
        dropout: float = 0.3,
        use_batchnorm: bool = False,
    ):
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes

        self.conv1 = nn.Conv1d(num_features, 64, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(64) if use_batchnorm else nn.Identity()

        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(128) if use_batchnorm else nn.Identity()

        self.pool = nn.MaxPool1d(kernel_size=2)

        self.lstm = nn.LSTM(
            input_size=128,
            hidden_size=hidden_size,
            num_layers=2,
            batch_first=True,
            bidirectional=True,
            dropout=dropout,
        )

        self.fc1 = nn.Linear(hidden_size * 2, 128)
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor:
        # x: (B, T, F), lengths: (B,)
        if x.ndim != 3:
            raise ValueError(f"Expected x.ndim=3 (B,T,F), got {x.shape}")
        if x.shape[-1] != self.num_features:
            raise ValueError(f"Expected F={self.num_features}, got {x.shape[-1]}")
        if lengths.ndim != 1:
            raise ValueError(f"Expected lengths.ndim=1, got {lengths.shape}")

        lengths = lengths.to(x.device)

        x = x.permute(0, 2, 1)  # (B, F, T)
        lengths = torch.clamp(lengths, min=1, max=x.shape[2])

        x = torch.relu(self.bn1(self.conv1(x)))
        x = self.pool(torch.relu(self.bn2(self.conv2(x))))

        # Update lengths for MaxPool1d(kernel=2)
        lengths2 = lengths // 2
        lengths2 = torch.clamp(lengths2, min=1, max=x.shape[2])

        x = x.permute(0, 2, 1)  # (B, T', 128)
        x, _ = self.lstm(x)

        # Take last valid timestep per sample
        idx = (lengths2 - 1).view(-1, 1, 1).expand(-1, 1, x.size(2))
        x = x.gather(dim=1, index=idx).squeeze(1)

        x = torch.relu(self.fc1(x))
        x = self.drop(x)
        return self.fc2(x)

### 6. Experiment Configuration

In [270]:
@dataclass
class TrainConfig:
    data_dir: Path
    out_dir: Path

    num_features: int = 18
    batch_size: int = 32
    epochs: int = 30

    hidden_size: int = 128
    dropout: float = 0.3
    use_batchnorm: bool = True

    lr: float = 1e-3
    weight_decay: float = 1e-4

    step_size: int = 8
    gamma: float = 0.7
    patience: int = 7

    seed: int = 42
    deterministic: bool = True

    mode: str = "train"  # "train" or "eval"

    topk: int = 3  # set 0 to disable top-k accuracy

def make_run_name(cfg: TrainConfig) -> str:
    bn = "bn" if cfg.use_batchnorm else "no_bn"
    drop = f"drop{int(cfg.dropout * 10):02d}"   # 0.3 -> drop03, 0.5 -> drop05
    return f"exp_{bn}_bs{cfg.batch_size}_seed{cfg.seed}_{drop}"

### 7. Training and Evaluation Primitives

In [271]:
def topk_accuracy(logits: torch.Tensor, y: torch.Tensor, k: int) -> float:
    if k <= 0:
        return float("nan")
    k = min(k, logits.shape[1])
    topk = torch.topk(logits, k=k, dim=1).indices  # (B, k)
    correct = (topk == y.view(-1, 1)).any(dim=1).float().mean().item()
    return correct

def train_one_epoch(model, loader, device, criterion, optimizer, *, topk: int = 0):
    model.train()
    loss_sum = 0.0
    correct, total = 0, 0
    topk_sum = 0.0
    n_batches = 0

    for x, y, lengths in loader:
        x, y, lengths = x.to(device), y.to(device), lengths.to(device)

        optimizer.zero_grad(set_to_none=True)
        logits = model(x, lengths)
        loss = criterion(logits, y)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        loss_sum += loss.item()
        pred = torch.argmax(logits, dim=1)
        correct += (pred == y).sum().item()
        total += y.size(0)

        if topk > 0:
            topk_sum += topk_accuracy(logits, y, topk)
        n_batches += 1

    avg_loss = loss_sum / max(1, len(loader))
    acc = correct / max(1, total)
    acck = (topk_sum / max(1, n_batches)) if topk > 0 else float("nan")
    return avg_loss, acc, acck


@torch.no_grad()
def evaluate(model, loader, device, criterion, *, topk: int = 0):
    model.eval()
    loss_sum = 0.0
    correct, total = 0, 0
    topk_sum = 0.0
    n_batches = 0

    y_true, y_pred = [], []

    for x, y, lengths in loader:
        x, y, lengths = x.to(device), y.to(device), lengths.to(device)

        logits = model(x, lengths)
        loss = criterion(logits, y)
        loss_sum += loss.item()

        pred = torch.argmax(logits, dim=1)
        correct += (pred == y).sum().item()
        total += y.size(0)

        y_true.append(y.cpu().numpy())
        y_pred.append(pred.cpu().numpy())

        if topk > 0:
            topk_sum += topk_accuracy(logits, y, topk)
        n_batches += 1

    y_true = np.concatenate(y_true) if y_true else np.array([], dtype=int)
    y_pred = np.concatenate(y_pred) if y_pred else np.array([], dtype=int)

    avg_loss = loss_sum / max(1, len(loader))
    acc = correct / max(1, total)
    acck = (topk_sum / max(1, n_batches)) if topk > 0 else float("nan")

    macro_f1 = f1_score(y_true, y_pred, average="macro") if y_true.size else float("nan")
    weighted_f1 = f1_score(y_true, y_pred, average="weighted") if y_true.size else float("nan")

    return avg_loss, acc, acck, macro_f1, weighted_f1, y_true, y_pred

### 8. Plotting Utilities

In [272]:
def plot_training_curves_csv(
    log_csv: Path,
    out_dir: Path,
    prefix: str,
    *,
    ema: Optional[float] = None,
    figsize=(3.5, 2.4),
    dpi: int = 600,
    use_color: bool = False,
    show_grid: bool = False,
    acc_in_percent: bool = True,
    smart_acc_ylim: bool = True,
    mark_best_val: bool = False,
    ):
    rc = {
        "font.family": "serif",
        "font.serif": ["Times New Roman", "STIXGeneral", "DejaVu Serif"],
        "mathtext.fontset": "stix",
        "font.size": 10,
        "axes.labelsize": 10,
        "legend.fontsize": 9,
        "xtick.labelsize": 9,
        "ytick.labelsize": 9,
        "lines.linewidth": 1.6,
        "axes.linewidth": 1.0,
        "xtick.direction": "in",
        "ytick.direction": "in",
        "xtick.minor.visible": True,
        "ytick.minor.visible": True,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "axes.grid": show_grid,
        "grid.alpha": 0.15,
        "grid.linestyle": "--",
        "grid.linewidth": 0.6,
        "pdf.fonttype": 42,
        "ps.fonttype": 42,
    }

    with mpl.rc_context(rc):
        log_csv = Path(log_csv)
        out_dir = Path(out_dir)
        out_dir.mkdir(parents=True, exist_ok=True)

        df = pd.read_csv(log_csv)
        req = ["epoch", "train_loss", "val_loss", "train_acc", "val_acc"]
        missing = [c for c in req if c not in df.columns]
        if missing:
            raise ValueError(f"Missing columns in {log_csv}: {missing}")

        x = df["epoch"].to_numpy(dtype=float)

        def smooth(y: np.ndarray) -> np.ndarray:
            y = np.asarray(y, dtype=float)
            if ema is None:
                return y
            a = float(ema)
            if not (0.0 < a < 1.0):
                raise ValueError("ema must be in (0,1) or None")
            ys = np.empty_like(y, dtype=float)
            ys[0] = y[0]
            for i in range(1, len(y)):
                ys[i] = a * y[i] + (1.0 - a) * ys[i - 1]
            return ys

        tr_loss = smooth(df["train_loss"].to_numpy())
        va_loss = smooth(df["val_loss"].to_numpy())
        tr_acc  = smooth(df["train_acc"].to_numpy())
        va_acc  = smooth(df["val_acc"].to_numpy())

        if acc_in_percent:
            tr_acc *= 100.0
            va_acc *= 100.0

        if use_color:
            c_train, c_val = "#004488", "#DDAA33"
        else:
            c_train, c_val = "black", "black"

        lw_train, lw_val = 1.8, 1.6

        def finalize_axes(ax: plt.Axes):
            ax.tick_params(which="both", top=False, right=False)
            ax.set_xlim(float(np.min(x)), float(np.max(x)))
            if show_grid:
                ax.grid(True, which="major")
                ax.grid(True, which="minor", alpha=0.08)

        def save(fig: plt.Figure, base: Path):
            fig.savefig(base.with_suffix(".pdf"), bbox_inches="tight", pad_inches=0.02)
            fig.savefig(base.with_suffix(".png"), dpi=dpi, bbox_inches="tight", pad_inches=0.02)
            plt.close(fig)

        fig, ax = plt.subplots(figsize=figsize)
        ax.plot(x, tr_loss, color=c_train, linestyle="-",  linewidth=lw_train, label="Train")
        ax.plot(x, va_loss, color=c_val,   linestyle="--", linewidth=lw_val,   label="Validation")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Loss")
        ax.legend(frameon=False, loc="best")
        finalize_axes(ax)
        save(fig, out_dir / f"{prefix}_loss")

        fig, ax = plt.subplots(figsize=figsize)
        ax.plot(x, tr_acc, color=c_train, linestyle="-",  linewidth=lw_train, label="Train")
        ax.plot(x, va_acc, color=c_val,   linestyle="--", linewidth=lw_val,   label="Validation")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Accuracy (%)" if acc_in_percent else "Accuracy")

        if acc_in_percent:
            if smart_acc_ylim and np.nanmax(va_acc) > 85:
                lo = max(0.0, np.floor((min(np.nanmin(tr_acc), np.nanmin(va_acc)) - 2.0) / 5.0) * 5.0)
                hi = min(100.0, np.ceil((max(np.nanmax(tr_acc), np.nanmax(va_acc)) + 1.0) / 5.0) * 5.0)
                if hi - lo < 20:
                    lo = max(0.0, hi - 20)
                ax.set_ylim(lo, hi)
            else:
                ax.set_ylim(0.0, 100.0)
        else:
            ax.set_ylim(0.0, 1.0)

        if mark_best_val:
            best_idx = int(np.nanargmax(va_acc))
            ax.axvline(x[best_idx], linestyle=":", color="black", linewidth=1.0, alpha=0.8)
            ax.text(
                x[best_idx], ax.get_ylim()[0],
                f"best@{int(x[best_idx])}",
                ha="center", va="bottom", fontsize=8
            )

        ax.legend(frameon=False, loc="best")
        finalize_axes(ax)
        save(fig, out_dir / f"{prefix}_accuracy")


def plot_confusion_matrix(
    y_true: Union[Sequence[int], np.ndarray],
    y_pred: Union[Sequence[int], np.ndarray],
    out_path: Path,
    *,
    class_names: Optional[Sequence[str]] = None,
    labels: Optional[Sequence[int]] = None,
    normalize: bool = True,
    show_counts: bool = True,
    min_show_pct: float = 2.0,
    min_show_count: int = 1,
    dpi: int = 600,
    cmap="Blues",
    tick_rotation: int = 45,
    ):
    rc = {
        "font.family": "serif",
        "font.serif": ["Times New Roman", "STIXGeneral", "DejaVu Serif"],
        "mathtext.fontset": "stix",
        "font.size": 9,
        "axes.labelsize": 10,
        "xtick.labelsize": 8,
        "ytick.labelsize": 8,
        "axes.linewidth": 1.0,
        "axes.spines.top": False,
        "axes.spines.right": False,
        "pdf.fonttype": 42,
        "ps.fonttype": 42,
    }

    with mpl.rc_context(rc):
        out_path = Path(out_path)
        out_path.parent.mkdir(parents=True, exist_ok=True)

        y_true = np.asarray(y_true, dtype=int).reshape(-1)
        y_pred = np.asarray(y_pred, dtype=int).reshape(-1)

        if labels is None:
            labels = np.unique(np.concatenate([y_true, y_pred])).tolist()
        labels = list(labels)
        n = len(labels)

        label_to_idx = {lab: i for i, lab in enumerate(labels)}
        y_true_idx = np.array([label_to_idx[y] for y in y_true])
        y_pred_idx = np.array([label_to_idx[y] for y in y_pred])

        if class_names is None:
            if n > 26:
                raise ValueError("Default a..z labels support up to 26 classes. Provide class_names explicitly.")
            class_names = list(string.ascii_lowercase[:n])
        else:
            if len(class_names) != n:
                raise ValueError("len(class_names) must match number of classes")

        cm = confusion_matrix(y_true_idx, y_pred_idx, labels=range(n)).astype(float)

        if normalize:
            row_sums = cm.sum(axis=1, keepdims=True)
            row_sums[row_sums == 0] = 1.0
            cm_show = (cm / row_sums) * 100.0
            vmin, vmax = 0.0, 100.0
        else:
            cm_show = cm
            vmin, vmax = 0.0, float(np.nanmax(cm_show)) if np.isfinite(np.nanmax(cm_show)) else 1.0

        fig_w = min(7.2, max(4.2, 0.42 * n + 1.9))
        fig_h = min(7.2, max(3.8, 0.42 * n + 1.6))
        fig, ax = plt.subplots(figsize=(fig_w, fig_h))

        im = ax.imshow(cm_show, interpolation="nearest", cmap=cmap, aspect="equal", vmin=vmin, vmax=vmax)
        cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
        cbar.ax.tick_params(labelsize=8)
        if normalize:
            cbar.set_label("%", fontsize=9)

        ax.set_xlabel("Predicted")
        ax.set_ylabel("True")

        ticks = np.arange(n)
        ax.set_xticks(ticks)
        ax.set_yticks(ticks)
        ax.set_xticklabels(class_names, rotation=tick_rotation, ha="right")
        ax.set_yticklabels(class_names)

        bbox_kw = dict(
            boxstyle="round,pad=0.15",
            facecolor="white",
            edgecolor="none",
            alpha=0.75,
        )

        for i in range(n):
            for j in range(n):
                cnt = cm[i, j]
                if normalize:
                    pct = cm_show[i, j]
                    if (pct < min_show_pct) and (cnt < min_show_count):
                        continue
                    text = f"{pct:.1f}%"
                    if show_counts:
                        text += f"\n({int(cnt)})"
                else:
                    if cnt < min_show_count:
                        continue
                    text = f"{int(cnt)}"

                ax.text(
                    j, i, text,
                    ha="center", va="center",
                    fontsize=8 if n <= 10 else 7,
                    color="black",
                    bbox=bbox_kw,
                )

        out_base = out_path.with_suffix("")
        fig.savefig(out_base.with_suffix(".pdf"), bbox_inches="tight", pad_inches=0.02)
        fig.savefig(out_base.with_suffix(".png"), dpi=dpi, bbox_inches="tight", pad_inches=0.02)
        plt.close(fig)

### 9. Trainer 


In [273]:
class Trainer:
    def __init__(self, cfg: TrainConfig):
        self.cfg = cfg
        seed_everything(cfg.seed, deterministic=cfg.deterministic)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        expected = make_run_name(cfg)
        if cfg.out_dir.name != expected:
            raise ValueError(f"out_dir name mismatch:\n expected={expected}\n got={cfg.out_dir.name}")

        cfg.out_dir.mkdir(parents=True, exist_ok=True)

        # Save config once during training
        if cfg.mode == "train":
            (cfg.out_dir / "config.json").write_text(
                json.dumps(
                    {k: str(v) if isinstance(v, Path) else v for k, v in asdict(cfg).items()},
                    indent=2
                )
            )

        # ---- Load processed splits (y is already int64 0..C-1) ----
        (X_train, X_val, X_test,
         y_train, y_val, y_test,
         len_train, len_val, len_test) = load_raw_splits(cfg.data_dir)

        # ---- Load label names from preprocessing output ----
        label_map_path = cfg.data_dir / "label_map.json"
        if not label_map_path.exists():
            raise FileNotFoundError(
                f"Missing {label_map_path}. Your preprocessing should save label_map.json."
            )

        label_map = json.loads(label_map_path.read_text())
        id2label = {int(k): v for k, v in label_map["id2label"].items()}
        num_classes = len(id2label)

        # class_names in correct index order
        self.class_names = [id2label[i] for i in range(num_classes)]

        # Loaders
        self.train_loader, self.val_loader, self.test_loader = make_loaders(
            X_train, y_train, len_train,
            X_val,   y_val,   len_val,
            X_test,  y_test,  len_test,
            cfg.batch_size,
            num_workers=0
        )

        # Model
        self.model = CNN_BiLSTM(
            num_features=cfg.num_features,
            num_classes=num_classes,
            hidden_size=cfg.hidden_size,
            dropout=cfg.dropout,
            use_batchnorm=cfg.use_batchnorm,   
        ).to(self.device)

        # Sanity-check BN toggle
        has_bn = any(isinstance(m, nn.BatchNorm1d) for m in self.model.modules())
        if cfg.use_batchnorm != has_bn:
            raise RuntimeError(
                f"BN mismatch: cfg.use_batchnorm={cfg.use_batchnorm}, model_has_bn={has_bn}"
            )
        
        print(f"[Model] use_batchnorm={cfg.use_batchnorm} | detected_bn={has_bn}")


        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.AdamW(
            self.model.parameters(),
            lr=cfg.lr,
            weight_decay=cfg.weight_decay
        )

        self.scheduler = torch.optim.lr_scheduler.StepLR(
            self.optimizer,
            step_size=cfg.step_size,
            gamma=cfg.gamma
        )

        self.best_val_metric = -1.0   # macro F1
        self.best_val_acc = -1.0
        self.best_path = cfg.out_dir / "best.pth"
        self.log_path  = cfg.out_dir / "train_log.csv"

    def fit(self):
        patience_left = self.cfg.patience
        rows = []

        for epoch in range(1, self.cfg.epochs + 1):
            tr_loss, tr_acc, tr_acck = train_one_epoch(
                self.model,
                self.train_loader,
                self.device,
                self.criterion,
                self.optimizer,
                topk=self.cfg.topk
            )

            va_loss, va_acc, va_acck, va_macro_f1, va_weighted_f1, _, _ = evaluate(
                self.model,
                self.val_loader,
                self.device,
                self.criterion,
                topk=self.cfg.topk
            )

            rows.append({
                "epoch": epoch,
                "train_loss": tr_loss,
                "train_acc": tr_acc,
                "train_acck": tr_acck,
                "val_loss": va_loss,
                "val_acc": va_acc,
                "val_acck": va_acck,
                "val_macro_f1": va_macro_f1,
                "val_weighted_f1": va_weighted_f1,
                "lr": self.optimizer.param_groups[0]["lr"],
            })
            pd.DataFrame(rows).to_csv(self.log_path, index=False)

            print(
                f"Epoch {epoch:3d}/{self.cfg.epochs} | "
                f"tr_loss={tr_loss:.4f} tr_acc={tr_acc:.4f} | "
                f"va_loss={va_loss:.4f} va_acc={va_acc:.4f} va_macroF1={va_macro_f1:.4f}"
            )

            val_metric = float(va_macro_f1)  # early stop on macro-F1
            if val_metric > self.best_val_metric:
                self.best_val_metric = val_metric
                self.best_val_acc = float(va_acc)
                torch.save(self.model.state_dict(), self.best_path)
                patience_left = self.cfg.patience
            else:
                patience_left -= 1
                if patience_left <= 0:
                    print(
                        f"Early stopping at epoch {epoch} "
                        f"(best val_macroF1={self.best_val_metric:.4f}, best val_acc={self.best_val_acc:.4f})"
                    )
                    break

            self.scheduler.step()

        return self.log_path

    def test(self):
        if not self.best_path.exists():
            raise FileNotFoundError(f"Missing {self.best_path}. Train first (or point to a run_dir with best.pth).")

        self.model.load_state_dict(torch.load(self.best_path, map_location=self.device))

        te_loss, te_acc, te_acck, te_macro_f1, te_weighted_f1, y_true, y_pred = evaluate(
            self.model,
            self.test_loader,
            self.device,
            self.criterion,
            topk=self.cfg.topk
        )

        report_txt = classification_report(
            y_true, y_pred,
            digits=4,
            zero_division=0,
            target_names=[str(c) for c in self.class_names]
        )

        # If log exists, refresh best vals from it (optional but fine)
        if self.log_path.exists():
            df = pd.read_csv(self.log_path)
            if "val_macro_f1" in df.columns and len(df) > 0:
                self.best_val_metric = float(df["val_macro_f1"].max())
            if "val_acc" in df.columns and len(df) > 0:
                self.best_val_acc = float(df["val_acc"].max())

        (self.cfg.out_dir / "test_summary.txt").write_text(
            f"Best val_macroF1: {self.best_val_metric:.4f}\n"
            f"Best val_acc: {self.best_val_acc:.4f}\n"
            f"Test loss: {te_loss:.4f}\n"
            f"Test acc: {te_acc:.4f}\n"
            f"Test acc@{self.cfg.topk}: {te_acck:.4f}\n"
            f"Test macro F1: {te_macro_f1:.4f}\n"
            f"Test weighted F1: {te_weighted_f1:.4f}\n\n"
            f"{report_txt}\n"
        )

        np.save(self.cfg.out_dir / "y_true_test.npy", y_true)
        np.save(self.cfg.out_dir / "y_pred_test.npy", y_pred)

        print(f"TEST | loss={te_loss:.4f} acc={te_acc:.4f} macroF1={te_macro_f1:.4f}")
        return te_loss, te_acc, y_true, y_pred


### 10. Start Training

In [340]:
def train():
    cfg = TrainConfig(
        data_dir=Path("data/processed_imu"),
        out_dir=Path("results"),
        batch_size=16,
        use_batchnorm=False,
        dropout=0.3,
        seed=42,
        topk=3,
        epochs=30,
        deterministic=True,
        mode="train",
    )

    cfg.out_dir = cfg.out_dir / make_run_name(cfg)
    trainer = Trainer(cfg)
    trainer.fit()
    print("Training completed.")

In [342]:
train()

[Model] use_batchnorm=False | detected_bn=False
Epoch   1/30 | tr_loss=2.6926 tr_acc=0.1729 | va_loss=1.9648 va_acc=0.3406 va_macroF1=0.2857
Epoch   2/30 | tr_loss=1.5561 tr_acc=0.4816 | va_loss=1.0556 va_acc=0.6575 va_macroF1=0.6448
Epoch   3/30 | tr_loss=0.8112 tr_acc=0.7459 | va_loss=0.5522 va_acc=0.8307 va_macroF1=0.8255
Epoch   4/30 | tr_loss=0.5005 tr_acc=0.8410 | va_loss=0.3888 va_acc=0.8957 va_macroF1=0.8936
Epoch   5/30 | tr_loss=0.3177 tr_acc=0.8977 | va_loss=0.3044 va_acc=0.9173 va_macroF1=0.9161
Epoch   6/30 | tr_loss=0.2185 tr_acc=0.9345 | va_loss=0.1998 va_acc=0.9449 va_macroF1=0.9452
Epoch   7/30 | tr_loss=0.1689 tr_acc=0.9480 | va_loss=0.1679 va_acc=0.9488 va_macroF1=0.9482
Epoch   8/30 | tr_loss=0.1225 tr_acc=0.9636 | va_loss=0.1605 va_acc=0.9665 va_macroF1=0.9664
Epoch   9/30 | tr_loss=0.0561 tr_acc=0.9818 | va_loss=0.0812 va_acc=0.9823 va_macroF1=0.9824
Epoch  10/30 | tr_loss=0.0418 tr_acc=0.9882 | va_loss=0.0666 va_acc=0.9882 va_macroF1=0.9882
Epoch  11/30 | tr_loss

### 11. Evaluate Model

In [325]:
def load_cfg_from_run_dir(run_dir: str | Path) -> TrainConfig:
    run_dir = Path(run_dir)
    cfg_path = run_dir / "config.json"
    if not cfg_path.exists():
        raise FileNotFoundError(f"Missing {cfg_path}. This run_dir is not a valid experiment output folder.")

    d = json.loads(cfg_path.read_text())

    d["data_dir"] = Path(d["data_dir"])
    d["out_dir"]  = Path(d["out_dir"])

    d["mode"] = "eval"

    return TrainConfig(**d)


def evaluate_run(run_dir: str | Path):
    cfg = load_cfg_from_run_dir(run_dir)
    trainer = Trainer(cfg)
    trainer.test()
    print(f"Evaluation completed for: {cfg.out_dir}")

In [344]:
evaluate_run("results/exp_bn_bs16_seed42_drop03")
evaluate_run("results/exp_no_bn_bs16_seed42_drop03")    

[Model] use_batchnorm=True | detected_bn=True
TEST | loss=0.1175 acc=0.9822 macroF1=0.9822
Evaluation completed for: results\exp_bn_bs16_seed42_drop03
[Model] use_batchnorm=False | detected_bn=False
TEST | loss=0.1488 acc=0.9763 macroF1=0.9765
Evaluation completed for: results\exp_no_bn_bs16_seed42_drop03


### 12. Draw Plots 

In [345]:
def plot_results(run_dir: Union[str, Path]):
    OUT_DIR = Path(run_dir)

    log_csv = OUT_DIR / "train_log.csv"
    y_true = np.load(OUT_DIR / "y_true_test.npy")
    y_pred = np.load(OUT_DIR / "y_pred_test.npy")

    # Load class names from the SAME data_dir used for training
    cfg_path = OUT_DIR / "config.json"
    if not cfg_path.exists():
        raise FileNotFoundError(f"Missing {cfg_path}")

    cfg_json = json.loads(cfg_path.read_text())
    data_dir = Path(cfg_json["data_dir"])

    label_map_path = data_dir / "label_map.json"
    if not label_map_path.exists():
        raise FileNotFoundError(f"Missing {label_map_path}")

    label_map = json.loads(label_map_path.read_text())
    id2label = {int(k): v for k, v in label_map["id2label"].items()}
    class_names = [id2label[i] for i in range(len(id2label))]

    prefix = OUT_DIR.name

    plot_training_curves_csv(log_csv, OUT_DIR, prefix)

    labels = list(range(len(class_names)))

    plot_confusion_matrix(
        y_true, y_pred,
        OUT_DIR / "confusion_matrix_counts",
        class_names=class_names,
        labels=labels,
        normalize=False
    )

    plot_confusion_matrix(
        y_true, y_pred,
        OUT_DIR / "confusion_matrix_normalized",
        class_names=class_names,
        labels=labels,
        normalize=True,
        show_counts=True,
        min_show_pct=10.0
    )

    print(f"Plots generated for {OUT_DIR}")

In [346]:
plot_results("results/exp_bn_bs16_seed42_drop03")
plot_results("results/exp_no_bn_bs16_seed42_drop03")


Plots generated for results\exp_bn_bs16_seed42_drop03
Plots generated for results\exp_no_bn_bs16_seed42_drop03
