# HW08-09: PyTorch MLP — регуляризация и оптимизация обучения

**Датасет:** KMNIST (Вариант A)  


## 2.3.1. Импорты, seed и устройство

In [7]:
import os
import json
import csv
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset

import torchvision  # для версии в выводе
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

print(f"torch:       {torch.__version__}")
print(f"torchvision: {torchvision.__version__}")

torch:       2.10.0+cpu
torchvision: 0.25.0+cpu


In [8]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Используемое устройство: {device}")

# Директории для артефактов
ARTIFACTS_DIR = Path('artifacts')
FIGURES_DIR = ARTIFACTS_DIR / 'figures'
ARTIFACTS_DIR.mkdir(exist_ok=True)
FIGURES_DIR.mkdir(exist_ok=True)
print(f"Артефакты будут сохранены в: {ARTIFACTS_DIR.resolve()}")

Используемое устройство: cpu
Артефакты будут сохранены в: /home/antonio/Рабочий стол/VUZ/aie_dpo_2025/homeworks/HW08-09/artifacts


## 2.3.2. Данные и DataLoader

In [9]:
from torch.utils.data import Dataset

class KMNISTFromNpz(Dataset):
    """
    Загружает KMNIST из локальных .npz файлов (официальный формат KMNIST GitHub).
    imgs: uint8 (N, 28, 28), labels: uint8 (N,)
    """
    CLASSES = ['お', 'き', 'す', 'つ', 'な', 'は', 'ま', 'や', 'れ', 'を']

    def __init__(self, imgs_path, labels_path, transform=None):
        imgs   = np.load(imgs_path)['arr_0']    # (N, 28, 28) uint8
        labels = np.load(labels_path)['arr_0']  # (N,) uint8
        self.images    = imgs.astype(np.float32) / 255.0  # [0, 1]
        self.labels    = labels.astype(np.int64)
        self.transform = transform
        self.classes   = self.CLASSES

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img = self.images[idx]           # (28, 28) float32
        img = torch.tensor(img).unsqueeze(0)  # (1, 28, 28)
        # Нормализация: (x - 0.5) / 0.5  →  [-1, 1]
        img = (img - 0.5) / 0.5
        label = self.labels[idx]
        return img, label


# Загружаем из .npz файлов (лежат рядом с ноутбуком)
full_train_dataset = KMNISTFromNpz(
    imgs_path='kmnist-train-imgs.npz',
    labels_path='kmnist-train-labels.npz',
)
test_dataset = KMNISTFromNpz(
    imgs_path='kmnist-test-imgs.npz',
    labels_path='kmnist-test-labels.npz',
)

print(f"Всего train: {len(full_train_dataset)}, Test: {len(test_dataset)}")
print(f"Классы: {full_train_dataset.classes}")

FileNotFoundError: [Errno 2] No such file or directory: 'kmnist-train-imgs.npz'

In [None]:
# Воспроизводимое разбиение train/val = 80/20
n_total = len(full_train_dataset)
n_train = int(0.8 * n_total)
n_val = n_total - n_train

generator = torch.Generator().manual_seed(SEED)
train_dataset, val_dataset = random_split(
    full_train_dataset, [n_train, n_val], generator=generator
)

print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")

# DataLoader'ы
BATCH_SIZE = 64

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                          generator=torch.Generator().manual_seed(SEED))
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

# Sanity check
x_sample, y_sample = next(iter(train_loader))
print(f"\nSanity check:")
print(f"  x.shape = {x_sample.shape}  (batch, channels, H, W)")
print(f"  y.shape = {y_sample.shape}")
print(f"  x range = [{x_sample.min():.3f}, {x_sample.max():.3f}]")
print(f"  y unique = {y_sample.unique().tolist()}")

Train: 48000, Val: 12000, Test: 10000

Sanity check:
  x.shape = torch.Size([64, 1, 28, 28])  (batch, channels, H, W)
  y.shape = torch.Size([64])
  x range = [-1.000, 1.000]
  y unique = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


## 2.3.3. Модель MLP

In [None]:
class MLP(nn.Module):
    """
    Универсальный MLP с поддержкой Dropout и BatchNorm.
    
    Порядок слоёв в каждом скрытом блоке:
        Linear -> [BatchNorm1d] -> ReLU -> [Dropout]
    """
    def __init__(
        self,
        input_size: int = 784,
        hidden_sizes: list = None,
        num_classes: int = 10,
        dropout_p: float = 0.0,
        use_batchnorm: bool = False,
    ):
        super().__init__()
        if hidden_sizes is None:
            hidden_sizes = [512, 256]

        layers = [nn.Flatten()]
        in_features = input_size

        for h in hidden_sizes:
            layers.append(nn.Linear(in_features, h))
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(h))
            layers.append(nn.ReLU())
            if dropout_p > 0.0:
                layers.append(nn.Dropout(dropout_p))
            in_features = h

        layers.append(nn.Linear(in_features, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


# Быстрая проверка модели
_m = MLP()
_x = torch.zeros(4, 1, 28, 28)
_out = _m(_x)
print(f"MLP forward test: input {_x.shape} -> output {_out.shape}")
del _m, _x, _out

MLP forward test: input torch.Size([4, 1, 28, 28]) -> output torch.Size([4, 10])


## Утилиты: цикл обучения и оценка

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)
    return total_loss / total, correct / total


def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            total_loss += loss.item() * x.size(0)
            preds = logits.argmax(dim=1)
            correct += (preds == y).sum().item()
            total += x.size(0)
    return total_loss / total, correct / total


class EarlyStopping:
    """Останавливает обучение, если val_accuracy не улучшается patience эпох."""
    def __init__(self, patience: int = 5, min_delta: float = 1e-4):
        self.patience = patience
        self.min_delta = min_delta
        self.best_val_acc = -float('inf')
        self.counter = 0
        self.best_state = None
        self.stopped_epoch = None

    def step(self, val_acc, model, epoch):
        if val_acc > self.best_val_acc + self.min_delta:
            self.best_val_acc = val_acc
            self.counter = 0
            self.best_state = {k: v.clone() for k, v in model.state_dict().items()}
        else:
            self.counter += 1
        if self.counter >= self.patience:
            self.stopped_epoch = epoch
            return True
        return False

    def restore_best(self, model):
        if self.best_state is not None:
            model.load_state_dict(self.best_state)


def run_experiment(model, train_loader, val_loader, optimizer, criterion, device,
                   max_epochs=15, early_stopping=None, verbose=True, tag=''):
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []}
    for epoch in range(1, max_epochs + 1):
        tr_loss, tr_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        vl_loss, vl_acc = evaluate(model, val_loader, criterion, device)
        history['train_loss'].append(tr_loss)
        history['val_loss'].append(vl_loss)
        history['train_acc'].append(tr_acc)
        history['val_acc'].append(vl_acc)
        if verbose:
            print(f"[{tag}] Epoch {epoch:02d}/{max_epochs}: "
                  f"train_loss={tr_loss:.4f} train_acc={tr_acc:.4f} | "
                  f"val_loss={vl_loss:.4f} val_acc={vl_acc:.4f}")
        if early_stopping is not None:
            if early_stopping.step(vl_acc, model, epoch):
                print(f"  >>> EarlyStopping на эпохе {epoch}, "
                      f"лучшая val_acc={early_stopping.best_val_acc:.4f}")
                early_stopping.restore_best(model)
                break
    return history

print("Утилиты определены.")

Утилиты определены.


---
## 3.1. Часть A (S08): Регуляризация (E1-E4)

In [None]:
criterion = nn.CrossEntropyLoss()
MAX_EPOCHS_A = 15

# ── E1: Базовая MLP без регуляризации ──────────────────────────────────────
print("=" * 60)
print("E1: Base MLP (no Dropout, no BatchNorm)")
print("=" * 60)

torch.manual_seed(SEED)
model_e1 = MLP(hidden_sizes=[512, 256], dropout_p=0.0, use_batchnorm=False).to(device)
optimizer_e1 = optim.Adam(model_e1.parameters(), lr=1e-3)

history_e1 = run_experiment(
    model_e1, train_loader, val_loader, optimizer_e1, criterion, device,
    max_epochs=MAX_EPOCHS_A, tag='E1'
)

best_val_acc_e1 = max(history_e1['val_acc'])
best_val_loss_e1 = history_e1['val_loss'][history_e1['val_acc'].index(best_val_acc_e1)]
epochs_e1 = len(history_e1['val_acc'])
print(f"\n[E1] Лучшая val_accuracy: {best_val_acc_e1:.4f}")

E1: Base MLP (no Dropout, no BatchNorm)


[E1] Epoch 01/15: train_loss=0.4363 train_acc=0.8628 | val_loss=0.2836 val_acc=0.9122


[E1] Epoch 02/15: train_loss=0.2086 train_acc=0.9363 | val_loss=0.1990 val_acc=0.9415


[E1] Epoch 03/15: train_loss=0.1448 train_acc=0.9545 | val_loss=0.1959 val_acc=0.9397


[E1] Epoch 04/15: train_loss=0.1081 train_acc=0.9655 | val_loss=0.1922 val_acc=0.9432


[E1] Epoch 05/15: train_loss=0.0880 train_acc=0.9712 | val_loss=0.1947 val_acc=0.9477


[E1] Epoch 06/15: train_loss=0.0729 train_acc=0.9764 | val_loss=0.1845 val_acc=0.9472


[E1] Epoch 07/15: train_loss=0.0634 train_acc=0.9785 | val_loss=0.1823 val_acc=0.9549


[E1] Epoch 08/15: train_loss=0.0521 train_acc=0.9830 | val_loss=0.2002 val_acc=0.9519


[E1] Epoch 09/15: train_loss=0.0479 train_acc=0.9843 | val_loss=0.2196 val_acc=0.9498


[E1] Epoch 10/15: train_loss=0.0479 train_acc=0.9840 | val_loss=0.1876 val_acc=0.9561


[E1] Epoch 11/15: train_loss=0.0349 train_acc=0.9886 | val_loss=0.2059 val_acc=0.9554


[E1] Epoch 12/15: train_loss=0.0388 train_acc=0.9870 | val_loss=0.2345 val_acc=0.9522


[E1] Epoch 13/15: train_loss=0.0311 train_acc=0.9891 | val_loss=0.2625 val_acc=0.9512


[E1] Epoch 14/15: train_loss=0.0359 train_acc=0.9878 | val_loss=0.2419 val_acc=0.9547


[E1] Epoch 15/15: train_loss=0.0338 train_acc=0.9897 | val_loss=0.2487 val_acc=0.9526

[E1] Лучшая val_accuracy: 0.9561


In [None]:
# ── E2: MLP + Dropout ───────────────────────────────────────────────────────
print("=" * 60)
print("E2: MLP + Dropout(p=0.3)")
print("=" * 60)

torch.manual_seed(SEED)
model_e2 = MLP(hidden_sizes=[512, 256], dropout_p=0.3, use_batchnorm=False).to(device)
optimizer_e2 = optim.Adam(model_e2.parameters(), lr=1e-3)

history_e2 = run_experiment(
    model_e2, train_loader, val_loader, optimizer_e2, criterion, device,
    max_epochs=MAX_EPOCHS_A, tag='E2'
)

best_val_acc_e2 = max(history_e2['val_acc'])
best_val_loss_e2 = history_e2['val_loss'][history_e2['val_acc'].index(best_val_acc_e2)]
epochs_e2 = len(history_e2['val_acc'])
print(f"\n[E2] Лучшая val_accuracy: {best_val_acc_e2:.4f}")

E2: MLP + Dropout(p=0.3)


[E2] Epoch 01/15: train_loss=0.5469 train_acc=0.8254 | val_loss=0.3064 val_acc=0.9103


[E2] Epoch 02/15: train_loss=0.3142 train_acc=0.9034 | val_loss=0.2316 val_acc=0.9299


[E2] Epoch 03/15: train_loss=0.2525 train_acc=0.9211 | val_loss=0.1992 val_acc=0.9401


[E2] Epoch 04/15: train_loss=0.2145 train_acc=0.9325 | val_loss=0.2073 val_acc=0.9367


[E2] Epoch 05/15: train_loss=0.1876 train_acc=0.9408 | val_loss=0.1925 val_acc=0.9440


[E2] Epoch 06/15: train_loss=0.1693 train_acc=0.9471 | val_loss=0.1839 val_acc=0.9465


[E2] Epoch 07/15: train_loss=0.1592 train_acc=0.9498 | val_loss=0.1756 val_acc=0.9472


[E2] Epoch 08/15: train_loss=0.1494 train_acc=0.9520 | val_loss=0.2004 val_acc=0.9439


[E2] Epoch 09/15: train_loss=0.1404 train_acc=0.9548 | val_loss=0.1577 val_acc=0.9551


[E2] Epoch 10/15: train_loss=0.1342 train_acc=0.9577 | val_loss=0.1691 val_acc=0.9537


[E2] Epoch 11/15: train_loss=0.1251 train_acc=0.9611 | val_loss=0.1571 val_acc=0.9577


[E2] Epoch 12/15: train_loss=0.1210 train_acc=0.9625 | val_loss=0.1738 val_acc=0.9536


[E2] Epoch 13/15: train_loss=0.1163 train_acc=0.9623 | val_loss=0.1638 val_acc=0.9539


[E2] Epoch 14/15: train_loss=0.1108 train_acc=0.9654 | val_loss=0.1708 val_acc=0.9557


[E2] Epoch 15/15: train_loss=0.1074 train_acc=0.9651 | val_loss=0.1655 val_acc=0.9566

[E2] Лучшая val_accuracy: 0.9577


In [None]:
# ── E3: MLP + BatchNorm ──────────────────────────────────────────────────────
print("=" * 60)
print("E3: MLP + BatchNorm")
print("=" * 60)

torch.manual_seed(SEED)
model_e3 = MLP(hidden_sizes=[512, 256], dropout_p=0.0, use_batchnorm=True).to(device)
optimizer_e3 = optim.Adam(model_e3.parameters(), lr=1e-3)

history_e3 = run_experiment(
    model_e3, train_loader, val_loader, optimizer_e3, criterion, device,
    max_epochs=MAX_EPOCHS_A, tag='E3'
)

best_val_acc_e3 = max(history_e3['val_acc'])
best_val_loss_e3 = history_e3['val_loss'][history_e3['val_acc'].index(best_val_acc_e3)]
epochs_e3 = len(history_e3['val_acc'])
print(f"\n[E3] Лучшая val_accuracy: {best_val_acc_e3:.4f}")

E3: MLP + BatchNorm


[E3] Epoch 01/15: train_loss=0.3383 train_acc=0.8964 | val_loss=0.2156 val_acc=0.9313


[E3] Epoch 02/15: train_loss=0.1554 train_acc=0.9519 | val_loss=0.1695 val_acc=0.9469


[E3] Epoch 03/15: train_loss=0.1004 train_acc=0.9677 | val_loss=0.1734 val_acc=0.9463


[E3] Epoch 04/15: train_loss=0.0752 train_acc=0.9760 | val_loss=0.1620 val_acc=0.9525


[E3] Epoch 05/15: train_loss=0.0530 train_acc=0.9831 | val_loss=0.1639 val_acc=0.9536


[E3] Epoch 06/15: train_loss=0.0481 train_acc=0.9841 | val_loss=0.1647 val_acc=0.9533


[E3] Epoch 07/15: train_loss=0.0371 train_acc=0.9880 | val_loss=0.1703 val_acc=0.9557


[E3] Epoch 08/15: train_loss=0.0328 train_acc=0.9891 | val_loss=0.1751 val_acc=0.9570


[E3] Epoch 09/15: train_loss=0.0289 train_acc=0.9905 | val_loss=0.1652 val_acc=0.9583


[E3] Epoch 10/15: train_loss=0.0294 train_acc=0.9901 | val_loss=0.1732 val_acc=0.9569


[E3] Epoch 11/15: train_loss=0.0247 train_acc=0.9914 | val_loss=0.1558 val_acc=0.9596


[E3] Epoch 12/15: train_loss=0.0205 train_acc=0.9931 | val_loss=0.1570 val_acc=0.9625


[E3] Epoch 13/15: train_loss=0.0197 train_acc=0.9938 | val_loss=0.1722 val_acc=0.9593


[E3] Epoch 14/15: train_loss=0.0173 train_acc=0.9943 | val_loss=0.1822 val_acc=0.9580


[E3] Epoch 15/15: train_loss=0.0182 train_acc=0.9938 | val_loss=0.1743 val_acc=0.9597

[E3] Лучшая val_accuracy: 0.9625


In [None]:
# ── E4: Лучший из E2/E3 + EarlyStopping ─────────────────────────────────────
if best_val_acc_e3 >= best_val_acc_e2:
    best_base_tag = 'E3'
    e4_dropout_p = 0.0
    e4_batchnorm = True
else:
    best_base_tag = 'E2'
    e4_dropout_p = 0.3
    e4_batchnorm = False

print(f"Лучший между E2/E3: {best_base_tag} (val_acc={max(best_val_acc_e2, best_val_acc_e3):.4f})")
print("=" * 60)
print(f"E4: {best_base_tag} конфиг + EarlyStopping(patience=5)")
print("=" * 60)

torch.manual_seed(SEED)
model_e4 = MLP(
    hidden_sizes=[512, 256],
    dropout_p=e4_dropout_p,
    use_batchnorm=e4_batchnorm
).to(device)
optimizer_e4 = optim.Adam(model_e4.parameters(), lr=1e-3)
es = EarlyStopping(patience=5)

history_e4 = run_experiment(
    model_e4, train_loader, val_loader, optimizer_e4, criterion, device,
    max_epochs=30, early_stopping=es, tag='E4'
)

best_val_acc_e4 = es.best_val_acc
best_val_loss_e4 = history_e4['val_loss'][history_e4['val_acc'].index(max(history_e4['val_acc']))]
epochs_e4 = len(history_e4['val_acc'])
stopped_epoch = es.stopped_epoch

print(f"\n[E4] Лучшая val_accuracy: {best_val_acc_e4:.4f}")
print(f"[E4] Обучение остановлено на эпохе: {stopped_epoch}")

# Сохранить лучшую модель
torch.save(model_e4.state_dict(), str(ARTIFACTS_DIR / 'best_model.pt'))
print("Сохранено: artifacts/best_model.pt")

Лучший между E2/E3: E3 (val_acc=0.9625)
E4: E3 конфиг + EarlyStopping(patience=5)


[E4] Epoch 01/30: train_loss=0.3398 train_acc=0.8972 | val_loss=0.2078 val_acc=0.9327


[E4] Epoch 02/30: train_loss=0.1548 train_acc=0.9520 | val_loss=0.2393 val_acc=0.9228


[E4] Epoch 03/30: train_loss=0.1028 train_acc=0.9673 | val_loss=0.1652 val_acc=0.9503


[E4] Epoch 04/30: train_loss=0.0754 train_acc=0.9763 | val_loss=0.1579 val_acc=0.9527


[E4] Epoch 05/30: train_loss=0.0560 train_acc=0.9812 | val_loss=0.1534 val_acc=0.9556


[E4] Epoch 06/30: train_loss=0.0441 train_acc=0.9859 | val_loss=0.1565 val_acc=0.9557


[E4] Epoch 07/30: train_loss=0.0381 train_acc=0.9873 | val_loss=0.1606 val_acc=0.9573


[E4] Epoch 08/30: train_loss=0.0357 train_acc=0.9882 | val_loss=0.1607 val_acc=0.9581


[E4] Epoch 09/30: train_loss=0.0281 train_acc=0.9903 | val_loss=0.1605 val_acc=0.9570


[E4] Epoch 10/30: train_loss=0.0267 train_acc=0.9913 | val_loss=0.1668 val_acc=0.9587


[E4] Epoch 11/30: train_loss=0.0233 train_acc=0.9920 | val_loss=0.1682 val_acc=0.9598


[E4] Epoch 12/30: train_loss=0.0217 train_acc=0.9925 | val_loss=0.1604 val_acc=0.9615


[E4] Epoch 13/30: train_loss=0.0206 train_acc=0.9934 | val_loss=0.1729 val_acc=0.9614


[E4] Epoch 14/30: train_loss=0.0188 train_acc=0.9942 | val_loss=0.1690 val_acc=0.9627


[E4] Epoch 15/30: train_loss=0.0153 train_acc=0.9952 | val_loss=0.1768 val_acc=0.9597


[E4] Epoch 16/30: train_loss=0.0166 train_acc=0.9950 | val_loss=0.1716 val_acc=0.9617


[E4] Epoch 17/30: train_loss=0.0158 train_acc=0.9950 | val_loss=0.1752 val_acc=0.9613


[E4] Epoch 18/30: train_loss=0.0141 train_acc=0.9953 | val_loss=0.1859 val_acc=0.9613


[E4] Epoch 19/30: train_loss=0.0155 train_acc=0.9948 | val_loss=0.1809 val_acc=0.9626
  >>> EarlyStopping на эпохе 19, лучшая val_acc=0.9627

[E4] Лучшая val_accuracy: 0.9627
[E4] Обучение остановлено на эпохе: 19
Сохранено: artifacts/best_model.pt


---
## 3.2. Часть B (S09): LR, оптимизаторы, weight decay (O1-O3)

In [None]:
# Фиксированная архитектура для O1-O3: та же, что E4
MAX_EPOCHS_DIAG = 8   # для O1 и O2 — диагностика
MAX_EPOCHS_O3   = 15  # для O3

# ── O1: Adam, lr слишком большой ─────────────────────────────────────────────
print("=" * 60)
print("O1: Adam, lr=0.1 (слишком большой)")
print("=" * 60)

torch.manual_seed(SEED)
model_o1 = MLP(
    hidden_sizes=[512, 256], dropout_p=e4_dropout_p, use_batchnorm=e4_batchnorm
).to(device)
optimizer_o1 = optim.Adam(model_o1.parameters(), lr=0.1)

history_o1 = run_experiment(
    model_o1, train_loader, val_loader, optimizer_o1, criterion, device,
    max_epochs=MAX_EPOCHS_DIAG, tag='O1'
)

best_val_acc_o1 = max(history_o1['val_acc'])
best_val_loss_o1 = min(history_o1['val_loss'])
print(f"\n[O1] Лучшая val_accuracy: {best_val_acc_o1:.4f}")

O1: Adam, lr=0.1 (слишком большой)


[O1] Epoch 01/8: train_loss=0.5430 train_acc=0.8390 | val_loss=0.4442 val_acc=0.8685


[O1] Epoch 02/8: train_loss=0.3339 train_acc=0.9000 | val_loss=0.2947 val_acc=0.9153


[O1] Epoch 03/8: train_loss=0.2727 train_acc=0.9196 | val_loss=0.2418 val_acc=0.9291


[O1] Epoch 04/8: train_loss=0.2315 train_acc=0.9308 | val_loss=0.2388 val_acc=0.9306


[O1] Epoch 05/8: train_loss=0.2144 train_acc=0.9376 | val_loss=0.2113 val_acc=0.9386


[O1] Epoch 06/8: train_loss=0.1938 train_acc=0.9421 | val_loss=0.2131 val_acc=0.9410


[O1] Epoch 07/8: train_loss=0.1770 train_acc=0.9492 | val_loss=0.2105 val_acc=0.9440


[O1] Epoch 08/8: train_loss=0.1762 train_acc=0.9494 | val_loss=0.2290 val_acc=0.9437

[O1] Лучшая val_accuracy: 0.9440


In [None]:
# ── O2: Adam, lr слишком маленький ───────────────────────────────────────────
print("=" * 60)
print("O2: Adam, lr=1e-5 (слишком маленький)")
print("=" * 60)

torch.manual_seed(SEED)
model_o2 = MLP(
    hidden_sizes=[512, 256], dropout_p=e4_dropout_p, use_batchnorm=e4_batchnorm
).to(device)
optimizer_o2 = optim.Adam(model_o2.parameters(), lr=1e-5)

history_o2 = run_experiment(
    model_o2, train_loader, val_loader, optimizer_o2, criterion, device,
    max_epochs=MAX_EPOCHS_DIAG, tag='O2'
)

best_val_acc_o2 = max(history_o2['val_acc'])
best_val_loss_o2 = min(history_o2['val_loss'])
print(f"\n[O2] Лучшая val_accuracy: {best_val_acc_o2:.4f}")

O2: Adam, lr=1e-5 (слишком маленький)


[O2] Epoch 01/8: train_loss=1.4373 train_acc=0.6212 | val_loss=0.9747 val_acc=0.7833


[O2] Epoch 02/8: train_loss=0.8456 train_acc=0.8028 | val_loss=0.7116 val_acc=0.8319


[O2] Epoch 03/8: train_loss=0.6582 train_acc=0.8419 | val_loss=0.5857 val_acc=0.8568


[O2] Epoch 04/8: train_loss=0.5521 train_acc=0.8647 | val_loss=0.5010 val_acc=0.8748


[O2] Epoch 05/8: train_loss=0.4765 train_acc=0.8812 | val_loss=0.4395 val_acc=0.8898


[O2] Epoch 06/8: train_loss=0.4205 train_acc=0.8950 | val_loss=0.3904 val_acc=0.9016


[O2] Epoch 07/8: train_loss=0.3745 train_acc=0.9063 | val_loss=0.3571 val_acc=0.9073


[O2] Epoch 08/8: train_loss=0.3364 train_acc=0.9150 | val_loss=0.3283 val_acc=0.9169

[O2] Лучшая val_accuracy: 0.9169


In [None]:
# ── O3: SGD + momentum + weight_decay ────────────────────────────────────────
print("=" * 60)
print("O3: SGD, momentum=0.9, weight_decay=1e-4, lr=0.01")
print("=" * 60)

torch.manual_seed(SEED)
model_o3 = MLP(
    hidden_sizes=[512, 256], dropout_p=e4_dropout_p, use_batchnorm=e4_batchnorm
).to(device)
optimizer_o3 = optim.SGD(
    model_o3.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4
)

history_o3 = run_experiment(
    model_o3, train_loader, val_loader, optimizer_o3, criterion, device,
    max_epochs=MAX_EPOCHS_O3, tag='O3'
)

best_val_acc_o3 = max(history_o3['val_acc'])
best_val_loss_o3 = history_o3['val_loss'][history_o3['val_acc'].index(best_val_acc_o3)]
epochs_o3 = len(history_o3['val_acc'])
print(f"\n[O3] Лучшая val_accuracy: {best_val_acc_o3:.4f}")

O3: SGD, momentum=0.9, weight_decay=1e-4, lr=0.01


[O3] Epoch 01/15: train_loss=0.3679 train_acc=0.8884 | val_loss=0.2017 val_acc=0.9385


[O3] Epoch 02/15: train_loss=0.1526 train_acc=0.9549 | val_loss=0.1705 val_acc=0.9466


[O3] Epoch 03/15: train_loss=0.0954 train_acc=0.9710 | val_loss=0.1584 val_acc=0.9523


[O3] Epoch 04/15: train_loss=0.0635 train_acc=0.9808 | val_loss=0.1418 val_acc=0.9584


[O3] Epoch 05/15: train_loss=0.0447 train_acc=0.9873 | val_loss=0.1446 val_acc=0.9571


[O3] Epoch 06/15: train_loss=0.0313 train_acc=0.9916 | val_loss=0.1359 val_acc=0.9599


[O3] Epoch 07/15: train_loss=0.0260 train_acc=0.9929 | val_loss=0.1469 val_acc=0.9586


[O3] Epoch 08/15: train_loss=0.0197 train_acc=0.9950 | val_loss=0.1425 val_acc=0.9602


[O3] Epoch 09/15: train_loss=0.0163 train_acc=0.9962 | val_loss=0.1400 val_acc=0.9614


[O3] Epoch 10/15: train_loss=0.0128 train_acc=0.9970 | val_loss=0.1452 val_acc=0.9609


[O3] Epoch 11/15: train_loss=0.0122 train_acc=0.9971 | val_loss=0.1380 val_acc=0.9630


[O3] Epoch 12/15: train_loss=0.0105 train_acc=0.9977 | val_loss=0.1486 val_acc=0.9592


[O3] Epoch 13/15: train_loss=0.0096 train_acc=0.9979 | val_loss=0.1345 val_acc=0.9633


[O3] Epoch 14/15: train_loss=0.0079 train_acc=0.9984 | val_loss=0.1378 val_acc=0.9627


[O3] Epoch 15/15: train_loss=0.0069 train_acc=0.9988 | val_loss=0.1289 val_acc=0.9656

[O3] Лучшая val_accuracy: 0.9656


---
## 4. Сохранение артефактов

In [None]:
# ── runs.csv ─────────────────────────────────────────────────────────────────
fieldnames = [
    'experiment_id', 'dataset', 'seed', 'model_summary',
    'optimizer', 'lr', 'momentum', 'weight_decay',
    'epochs_trained', 'best_val_accuracy', 'best_val_loss'
]

e4_summary = f'512-256, ReLU, {"+BN" if e4_batchnorm else "Dropout(0.3)"}, EarlyStopping(p=5)'

rows = [
    dict(experiment_id='E1', dataset='KMNIST', seed=SEED,
         model_summary='512-256, ReLU, no-Dropout, no-BN',
         optimizer='Adam', lr=1e-3, momentum='', weight_decay=0,
         epochs_trained=epochs_e1,
         best_val_accuracy=round(best_val_acc_e1, 4),
         best_val_loss=round(best_val_loss_e1, 4)),
    dict(experiment_id='E2', dataset='KMNIST', seed=SEED,
         model_summary='512-256, ReLU, Dropout(0.3), no-BN',
         optimizer='Adam', lr=1e-3, momentum='', weight_decay=0,
         epochs_trained=epochs_e2,
         best_val_accuracy=round(best_val_acc_e2, 4),
         best_val_loss=round(best_val_loss_e2, 4)),
    dict(experiment_id='E3', dataset='KMNIST', seed=SEED,
         model_summary='512-256, ReLU, no-Dropout, BatchNorm',
         optimizer='Adam', lr=1e-3, momentum='', weight_decay=0,
         epochs_trained=epochs_e3,
         best_val_accuracy=round(best_val_acc_e3, 4),
         best_val_loss=round(best_val_loss_e3, 4)),
    dict(experiment_id='E4', dataset='KMNIST', seed=SEED,
         model_summary=e4_summary,
         optimizer='Adam', lr=1e-3, momentum='', weight_decay=0,
         epochs_trained=epochs_e4,
         best_val_accuracy=round(best_val_acc_e4, 4),
         best_val_loss=round(best_val_loss_e4, 4)),
    dict(experiment_id='O1', dataset='KMNIST', seed=SEED,
         model_summary=f'512-256, ReLU, {"+BN" if e4_batchnorm else "Dropout(0.3)"}',
         optimizer='Adam', lr=0.1, momentum='', weight_decay=0,
         epochs_trained=len(history_o1['val_acc']),
         best_val_accuracy=round(best_val_acc_o1, 4),
         best_val_loss=round(best_val_loss_o1, 4)),
    dict(experiment_id='O2', dataset='KMNIST', seed=SEED,
         model_summary=f'512-256, ReLU, {"+BN" if e4_batchnorm else "Dropout(0.3)"}',
         optimizer='Adam', lr=1e-5, momentum='', weight_decay=0,
         epochs_trained=len(history_o2['val_acc']),
         best_val_accuracy=round(best_val_acc_o2, 4),
         best_val_loss=round(best_val_loss_o2, 4)),
    dict(experiment_id='O3', dataset='KMNIST', seed=SEED,
         model_summary=f'512-256, ReLU, {"+BN" if e4_batchnorm else "Dropout(0.3)"}',
         optimizer='SGD', lr=0.01, momentum=0.9, weight_decay=1e-4,
         epochs_trained=epochs_o3,
         best_val_accuracy=round(best_val_acc_o3, 4),
         best_val_loss=round(best_val_loss_o3, 4)),
]

with open(ARTIFACTS_DIR / 'runs.csv', 'w', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

print("Сохранено: artifacts/runs.csv")

# Выводим таблицу
print(f"\n{'ID':<4} {'optimizer':<6} {'lr':<8} {'momentum':<9} {'wd':<8} {'epochs':<7} {'val_acc':<10} {'val_loss'}")
print("-" * 65)
for r in rows:
    print(f"{r['experiment_id']:<4} {r['optimizer']:<6} {r['lr']:<8} "
          f"{str(r['momentum']):<9} {str(r['weight_decay']):<8} {r['epochs_trained']:<7} "
          f"{r['best_val_accuracy']:<10} {r['best_val_loss']}")

Сохранено: artifacts/runs.csv

ID   optimizer lr       momentum  wd       epochs  val_acc    val_loss
-----------------------------------------------------------------
E1   Adam   0.001              0        15      0.9561     0.1876
E2   Adam   0.001              0        15      0.9577     0.1571
E3   Adam   0.001              0        15      0.9625     0.157
E4   Adam   0.001              0        19      0.9627     0.169
O1   Adam   0.1                0        8       0.944      0.2105
O2   Adam   1e-05              0        8       0.9169     0.3283
O3   SGD    0.01     0.9       0.0001   15      0.9656     0.1289


In [None]:
# ── best_config.json ──────────────────────────────────────────────────────────
best_config = {
    'dataset': 'KMNIST',
    'seed': SEED,
    'model': {
        'class': 'MLP',
        'input_size': 784,
        'hidden_sizes': [512, 256],
        'num_classes': 10,
        'activation': 'ReLU',
        'dropout_p': e4_dropout_p,
        'use_batchnorm': e4_batchnorm,
    },
    'training': {
        'optimizer': 'Adam',
        'lr': 1e-3,
        'weight_decay': 0,
        'batch_size': BATCH_SIZE,
        'max_epochs': 30,
        'early_stopping_patience': 5,
        'epochs_trained': epochs_e4,
    },
    'results': {
        'best_val_accuracy': round(best_val_acc_e4, 4),
        'best_val_loss': round(best_val_loss_e4, 4),
    }
}

with open(ARTIFACTS_DIR / 'best_config.json', 'w') as f:
    json.dump(best_config, f, indent=2)

print("Сохранено: artifacts/best_config.json")
print(json.dumps(best_config, indent=2))

Сохранено: artifacts/best_config.json
{
  "dataset": "KMNIST",
  "seed": 42,
  "model": {
    "class": "MLP",
    "input_size": 784,
    "hidden_sizes": [
      512,
      256
    ],
    "num_classes": 10,
    "activation": "ReLU",
    "dropout_p": 0.0,
    "use_batchnorm": true
  },
  "training": {
    "optimizer": "Adam",
    "lr": 0.001,
    "weight_decay": 0,
    "batch_size": 64,
    "max_epochs": 30,
    "early_stopping_patience": 5,
    "epochs_trained": 19
  },
  "results": {
    "best_val_accuracy": 0.9627,
    "best_val_loss": 0.169
  }
}


## 5. Графики

In [None]:
# ── curves_best.png (E4) ─────────────────────────────────────────────────────
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
epochs_e4_range = range(1, len(history_e4['train_loss']) + 1)

ax1.plot(epochs_e4_range, history_e4['train_loss'], label='train loss', marker='o', markersize=3)
ax1.plot(epochs_e4_range, history_e4['val_loss'],   label='val loss',   marker='s', markersize=3)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title(f'E4 ({best_base_tag}+EarlyStopping): Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(epochs_e4_range, history_e4['train_acc'], label='train acc', marker='o', markersize=3)
ax2.plot(epochs_e4_range, history_e4['val_acc'],   label='val acc',   marker='s', markersize=3)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title(f'E4 ({best_base_tag}+EarlyStopping): Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(str(FIGURES_DIR / 'curves_best.png'), dpi=120, bbox_inches='tight')
plt.show()
print("Сохранено: artifacts/figures/curves_best.png")

Сохранено: artifacts/figures/curves_best.png


  plt.show()


In [None]:
# ── curves_lr_extremes.png (O1, O2) ──────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for ax, hist, label, lr_str in [
    (axes[0], history_o1, 'O1: Adam lr=0.1 (слишком большой)', '0.1'),
    (axes[1], history_o2, 'O2: Adam lr=1e-5 (слишком маленький)', '1e-5'),
]:
    ep = range(1, len(hist['train_loss']) + 1)
    ax.plot(ep, hist['train_loss'], label='train loss', marker='o', markersize=4)
    ax.plot(ep, hist['val_loss'],   label='val loss',   marker='s', markersize=4)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.set_title(label)
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(str(FIGURES_DIR / 'curves_lr_extremes.png'), dpi=120, bbox_inches='tight')
plt.show()
print("Сохранено: artifacts/figures/curves_lr_extremes.png")

Сохранено: artifacts/figures/curves_lr_extremes.png


  plt.show()


---
## 6. Финальная оценка лучшей модели на тесте (один раз)

In [None]:
# Загружаем лучшие веса и оцениваем на test — делается ОДИН РАЗ
model_e4.load_state_dict(torch.load(str(ARTIFACTS_DIR / 'best_model.pt'), map_location=device))
test_loss, test_acc = evaluate(model_e4, test_loader, criterion, device)

print("=" * 60)
print("Финальная оценка E4 на test-выборке")
print("=" * 60)
print(f"test_loss     = {test_loss:.4f}")
print(f"test_accuracy = {test_acc:.4f}")
print("\nОценка test-выборки завершена (использована ОДИН РАЗ).")

Финальная оценка E4 на test-выборке
test_loss     = 0.4476
test_accuracy = 0.9116

Оценка test-выборки завершена (использована ОДИН РАЗ).
