# 1. Импорт модулей и библиотек

In [61]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torchmetrics import F1Score, ROC
import pytorch_lightning as pl
import numpy as np
import random

# 2. Обеспечение воспроизводимости

In [62]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print(f"Random seed set as {seed}")

set_seed(42)

Random seed set as 42


# 3. Класс подготовки данных

In [63]:
class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str = "./data", batch_size: int = 64, num_workers: int = 2):
        super().__init__()
        # Расположение данных
        self.data_dir = data_dir
        # Размер батча
        self.batch_size = batch_size
        # Количество подпроцессов для подгрузки данных
        self.num_workers = num_workers

        # Определяем преобразования для данных
        self.transform = transforms.Compose([
            # Преобразуем изображение в формат тензора
            transforms.ToTensor(),
            # Нормализуем в диапазон от -1 до 1
            transforms.Normalize((0.5,), (0.5,))
        ])

    def prepare_data(self):
        # Скачиваем данные в нашу директорию
        datasets.FashionMNIST(self.data_dir, train=True, download=True)
        datasets.FashionMNIST(self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        # Загружаем данные и разбиваем на train/val/test
        if stage == 'fit' or stage is None:
            # Берем train часть, применяем трансформацию
            full_dataset = datasets.FashionMNIST(self.data_dir, train=True, transform=self.transform)
            # Рандом сплитом разбиваем на трейн и валидацию
            self.train_dataset, self.val_dataset = random_split(full_dataset, [55000, 5000])

        if stage == 'test' or stage is None:
            # Просто берем тестовую часть
            self.test_dataset = datasets.FashionMNIST(self.data_dir, train=False, transform=self.transform)

    # Создаем методы для даталоадеров
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=self.num_workers)


In [77]:
from torchmetrics import ROC, AUROC

class FashionMNISTModel(pl.LightningModule):
    def __init__(self, learning_rate: float = 0.001):
        super().__init__()
        self.learning_rate = learning_rate

        # Архитектура модели
        # Два сверточных слоя и два полносвязных линейных слоя
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

        # Метрики
        self.train_f1 = F1Score(task="multiclass", num_classes=10)
        self.val_f1 = F1Score(task="multiclass", num_classes=10)
        self.test_f1 = F1Score(task="multiclass", num_classes=10)

        self.val_roc = ROC(task="multiclass", num_classes=10)
        self.test_roc = ROC(task="multiclass", num_classes=10)
        self.val_auc = AUROC(task="multiclass", num_classes=10)
        self.test_auc = AUROC(task="multiclass", num_classes=10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    # Как раз засчет лайтнинга упрощение
    def training_step(self, batch, batch_idx):
        # Получаем фичи и таргеты
        x, y = batch
        # Через форвард получаем прогнозы
        y_hat = self(x)
        # Считаем лосс через кросс-энтропию
        loss = F.cross_entropy(y_hat, y)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        # Наиболее вероятные логиты смотрим
        preds = torch.argmax(y_hat, dim=1)
        # Обновляем метрику F1 на этом шаге
        self.train_f1(preds, y)
        self.log("train_f1", self.train_f1, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        preds = torch.argmax(y_hat, dim=1)
        self.val_f1(preds, y)
        self.log("val_f1", self.val_f1, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        self.val_auc(y_hat, y)
        self.log("val_roc_auc", self.val_auc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("test_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        preds = torch.argmax(y_hat, dim=1)
        self.test_f1(preds, y)
        self.log("test_f1", self.test_f1, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        self.test_auc(y_hat, y)
        self.log("test_roc_auc", self.test_auc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

    # Настройки оптимизатора и планировщика
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
        return [optimizer], [scheduler]

In [83]:
from pytorch_lightning.callbacks import EarlyStopping

# Callback для EarlyStopping
early_stop_callback = EarlyStopping(
    monitor="val_loss",  # Метрика для отслеживания
    patience=3,          # Количество эпох без улучшения
    mode="min",          # Остановка, если метрика уменьшается
    verbose=True         # Вывод сообщений
)

In [84]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

# Логгер для TensorBoard
logger = TensorBoardLogger("logs", name="fashion_mnist")

# Создаем Trainer
trainer = Trainer(
    max_epochs=10,                   # Максимальное количество эпох
    callbacks=[early_stop_callback], # Добавляем раннюю остановку, чтобы найти решение раньше
    logger=logger,                   # Логгируем в TensorBoard
    accelerator="auto",              # Автоматически используем GPU
    devices="auto"                   # Автоматически выбирает устройство
)

model = FashionMNISTModel(learning_rate=1e-3)

trainer.fit(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name     | Type              | Params | Mode 
--------------------------------------------------------
0  | conv1    | Conv2d            | 160    | train
1  | conv2    | Conv2d            | 4.6 K  | train
2  | fc1      | Linear            | 200 K  | train
3  | fc2      | Linear            | 1.3 K  | train
4  | train_f1 | MulticlassF1Score | 0      | train
5  | val_f1   | MulticlassF1Score | 0      | train
6  | test_f1  | MulticlassF1Score | 0      | train
7  | val_roc  | MulticlassROC     | 0      | train
8  | test_roc | MulticlassROC     | 0      | train
9  | val_auc  | MulticlassAUROC   | 0      | train
10 | test_auc | MulticlassAUROC   | 0      | train
--------------------------------------------------------
206 K     Trainable params
0         Non-trainable params
206 K     Total params
0.828     Total estimated model pa

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.346


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.329


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.266


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.246


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.232


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.220


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.213


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.211


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [85]:
# Тестируем модель
trainer.test(model, datamodule=data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test_f1            0.9185000061988831
        test_loss           0.2345825731754303
      test_roc_auc          0.9948101043701172
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.2345825731754303,
  'test_f1': 0.9185000061988831,
  'test_roc_auc': 0.9948101043701172}]