In [1]:
# Cell 1: Imports and seed
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from torchmetrics.classification import MulticlassF1Score, MulticlassROC, MulticlassAUROC
import numpy as np

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

1. **1 балл** Создайте класс `FashionMNISTDataModule`, реализуйте в нем:
    - загрузку данных, 
    - предобработку (перевод в тензоры, нормализация, etc
    - разбиение на train/val/test части
    - создание dataloader'ов- **1 балл**

In [2]:
# Cell 2: FashionMNISTDataModule
class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir='./data', batch_size=64):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        
        # Определение преобразований
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
        
    def prepare_data(self):
        # Загрузка данных
        torchvision.datasets.FashionMNIST(self.data_dir, train=True, download=True)
        torchvision.datasets.FashionMNIST(self.data_dir, train=False, download=True)
        
    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            fashion_full = torchvision.datasets.FashionMNIST(
                self.data_dir, train=True, transform=self.transform
            )
            # Разделение на train и validation
            self.fashion_train, self.fashion_val = random_split(
                fashion_full, [55000, 5000], generator=torch.Generator().manual_seed(42)
            )
            
        if stage == 'test' or stage is None:
            self.fashion_test = torchvision.datasets.FashionMNIST(
                self.data_dir, train=False, transform=self.transform
            )
    
    def train_dataloader(self):
        return DataLoader(self.fashion_train, batch_size=self.batch_size, shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.fashion_val, batch_size=self.batch_size)
    
    def test_dataloader(self):
        return DataLoader(self.fashion_test, batch_size=self.batch_size)

2. **2 балла** Создайте класс модели `FashionMNIST` (наследник `LightningModule`), реализуйте в нем:
    - training_step, validation_step, test_step
    - расчет метрик на валидации и тестировании из TorchMetrics: F1, ROC AUC
    - логирование метрик и функций потерь на каждой эпохе валидации/теста
    - подберите подходящие, на ваш взгляд, optimizer и lr-scheduler, а также их гиперпараметры

In [3]:
# Cell 3: FashionMNIST Model
class FashionMNIST(pl.LightningModule):
    def __init__(self, learning_rate=1e-3):
        super().__init__()
        self.save_hyperparameters()
        
        # Метрики
        self.f1 = MulticlassF1Score(num_classes=10)
        self.auroc = MulticlassAUROC(num_classes=10)
        
        # Определение архитектуры
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        
        # Логирование метрик
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_f1', self.f1(preds, y), prog_bar=True)
        self.log('val_auroc', self.auroc(logits, y), prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        
        # Логирование метрик
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_f1', self.f1(preds, y), prog_bar=True)
        self.log('test_auroc', self.auroc(logits, y), prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "monitor": "val_loss"
            }
        }

3. **1 балл** Обучите модель с помощью trainer'а:
    - добавьте `EarlyStopping`
    - реализуйте визуализацию логов через tensorboard
    - проверьте качество на тестовой части данных
    

Обеспечена воспроизводимость решения: зафиксированы random_state, ноутбук воспроизводится от начала до конца без ошибок - **1 балл**

In [4]:
# Cell 4: Training
# Инициализация логгера и callbacks
logger = TensorBoardLogger("lightning_logs", name="fashion_mnist")
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min'
)

# Инициализация данных и модели
data_module = FashionMNISTDataModule()
model = FashionMNIST()

# Инициализация тренера
trainer = pl.Trainer(
    max_epochs=10,
    callbacks=[early_stopping],
    logger=logger,
    accelerator='auto',
    devices=1
)

# Обучение модели
trainer.fit(model, data_module)

# Тестирование модели
trainer.test(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|█████████████████████████████████████████████████████████████████| 26421880/26421880 [00:04<00:00, 5967200.78it/s]


Extracting ./data\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████████████████| 29515/29515 [00:00<00:00, 242537.70it/s]


Extracting ./data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 4422102/4422102 [00:00<00:00, 7730821.52it/s]


Extracting ./data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████| 5148/5148 [00:00<?, ?it/s]


Extracting ./data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\FashionMNIST\raw



Missing logger folder: lightning_logs\fashion_mnist
  from pandas.core import (
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type              | Params
-----------------------------------------------
0 | f1       | MulticlassF1Score | 0     
1 | auroc    | MulticlassAUROC   | 0     
2 | conv1    | Conv2d            | 320   
3 | conv2    | Conv2d            | 18.5 K
4 | dropout1 | Dropout2d         | 0     
5 | dropout2 | Dropout2d         | 0     
6 | fc1      | Linear            | 1.2 M 
7 | fc2      | Linear            | 1.3 K 
-----------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.800     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.225783571600914,
  'test_f1': 0.9184894561767578,
  'test_auroc': 0.9941807389259338}]

### logs in tensorboard

In [5]:

%load_ext tensorboard
%tensorboard --logdir lightning_logs