# Домашняя работа по регуляризации и оптимизации

Ниже приводится код модели и функции обучения. Далее предлагается улучшить качество модели путем добавления регуляризаций и настройки оптимизатора.

Оценка будет выставляться по итоговому качеству на тестовом корпусе:

- 2: < 50%
- 3: 50-60%
- 4: 60-75%
- 5: > 75%

In [1]:
### Uncomment to install libraries.
!pip3 install pytorch_lightning torchmetrics PyYAML

Successfully installed lightning-utilities-0.7.1 pytorch_lightning-1.9.4 torchmetrics-0.11.3


In [2]:
import torch
import pytorch_lightning as pl
import yaml
from pytorch_lightning import seed_everything
from torchmetrics import Accuracy
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, ToTensor, Normalize

ROOT = "hwroot"
ANSWER_FILE = "submission.yaml"
USE_CUDA = torch.cuda.is_available()
NUM_WORKERS = 4
print("USE CUDA:", USE_CUDA)

USE CUDA: True


In [3]:
transform = Compose( [ToTensor(),
     Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = CIFAR10(ROOT, train=True, transform=transform, download=True)
testset = CIFAR10(ROOT, train=False, transform=transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to hwroot/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting hwroot/cifar-10-python.tar.gz to hwroot
Files already downloaded and verified


In [4]:
class CNN(torch.nn.Sequential):
    def __init__(self, num_classes=10, num_layers=10):
        in_channels = 3
        layers = []
        for i in range(num_layers):
            stride = 2 if (i + 1) % 3 == 0 else 1
            out_channels = 8 if i == 0 else in_channels * stride
            layers.append(self.make_layer(in_channels, out_channels, stride))
            in_channels = out_channels
        layers.append(self.make_head(in_channels, num_classes))
        super().__init__(*layers)
        self.num_classes = num_classes
            
    def make_layer(self, in_channels, out_channels, stride):
        return torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, 3, stride, padding=1),
            torch.nn.ReLU()
        )

    def make_head(self, in_channels, out_channels):
        return torch.nn.Sequential(
            torch.nn.AdaptiveMaxPool2d((1, 1)),
            torch.nn.Flatten(),
            torch.nn.Linear(in_channels, out_channels)
        )
    
print(CNN(10))

CNN(
  (0): Sequential(
    (0): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (1): Sequential(
    (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (4): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (5): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
  )
  (6): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (7): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (8): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1

In [23]:
class Module(pl.LightningModule):
    def __init__(self, model, batch_size=512):
        super().__init__()
        self.batch_size = batch_size
        self.model = model
        self.criterion = torch.nn.CrossEntropyLoss()
        self.metric = Accuracy(num_classes=model.num_classes, task='multiclass')
        
    def forward(self, images):
        return self.model(images)
    
    def step(self, stage, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = self.criterion(logits, labels)
        return {
            "loss": loss,
            "logits": logits
        }
    
    def training_step(self, batch, batch_idx):
        results = self.step("train", batch, batch_idx)
        self.log("lr", self.trainer.optimizers[0].param_groups[0]["lr"], prog_bar=True)
        with torch.no_grad():
            accuracy = (results["logits"].argmax(-1) == batch[1]).float().mean().item()
            self.log("accuracy", accuracy, prog_bar=True)
        return results["loss"]
    
    def validation_step(self, batch, batch_idx):
        results = self.step("val", batch, batch_idx)
        self.metric.update(results["logits"].argmax(dim=-1), batch[1])
        self.log(f"val_loss", results["loss"], prog_bar=True)
        self.log(f"val_acc", self.metric, prog_bar=True)
        
    def validation_epoch_end(self, outputs):
        self.final_metric = self.metric.compute()
        super().validation_epoch_end(outputs)
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        return torch.utils.data.DataLoader(trainset, batch_size=self.batch_size,
                                           shuffle=True, drop_last=True,
                                           num_workers=NUM_WORKERS)
    
    def val_dataloader(self):
        return torch.utils.data.DataLoader(testset, batch_size=self.batch_size,
                                           num_workers=NUM_WORKERS)
    

def train(module, epochs=10, dump=None):
    seed_everything(0)
    trainer = pl.Trainer(default_root_dir=ROOT, accelerator="auto", max_epochs=epochs)
    trainer.fit(module)
    if dump is not None:
        with open(dump, "w") as fp:
            yaml.safe_dump({"tasks": [{"task1": {"answer": module.final_metric.item()}}]}, fp)

In [6]:
# train(Module(CNN()))

# Ваше решение

In [27]:
class BetterCNN(CNN):
    # Предлагается добавить регуляризации BatchNorm2d и Dropout.
    def make_layer(self, in_channels, out_channels, stride): 
        return torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, 3, stride, padding=1, bias=False),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2)
        )

    def make_head(self, in_channels, out_channels):
        return torch.nn.Sequential(
            torch.nn.AdaptiveMaxPool2d((1, 1)),
            torch.nn.Flatten(),
            torch.nn.Linear(in_channels, out_channels)
        )

class BetterModule(Module):
    # Предлагается выбрать оптимизатор лучше, чем SGD.
    # Также предлагается подобрать параметры обучения.
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=11e-3, betas = (0.85, 0.98), amsgrad = True)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,  gamma=0.98)
        return [optimizer], [scheduler]


#BetterCNN() #adam 1e-2 init lr, steplr - 2, 0.8
             #adam 40 epochs, lr=1e-2, betas = (0.8, 0.98), amsgrad = True, steplr - 2, 0.8 ------0.723
             #adam 70 epochs, lr=1e-2, betas = (0.85, 0.94), amsgrad = True, eplr - 0.94 ------0.77
             #adam 70 epochs, lr=11e-3, betas = (0.85, 0.98), amsgrad = True, explr - 0.98 ------0.781

In [None]:
torch.manual_seed(0)

train(BetterModule(BetterCNN()), epochs = 90, dump=ANSWER_FILE)

In [26]:
### Use from Google Colab to download result.
from google.colab import files
files.download(ANSWER_FILE) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>