# Домашнее задание. Классификация изображений

Сегодня вам предстоить помочь телекомпании FOX в обработке их контента. Как вы знаете, сериал "Симпсоны" идет на телеэкранах более 25 лет, и за это время скопилось очень много видеоматериала. Персоонажи менялись вместе с изменяющимися графическими технологиями, и Гомер Симпсон-2018 не очень похож на Гомера Симпсона-1989. В этом задании вам необходимо классифицировать персонажей, проживающих в Спрингфилде. Думаю, нет смысла представлять каждого из них в отдельности.

**Мое имя на в leaderboard на kaggle - Dmitriy_Zharkovskiy_562531453**

In [None]:
!pip install pytorch_lightning
!pip install wandb

In [1]:
import torch
import numpy as np
import pandas as pd
import PIL
import torch
import pickle
import numpy as np
from skimage import io
from tqdm import tqdm, tqdm_notebook
from PIL import Image
from pathlib import Path
import random
import os
from torchvision import transforms
from torchvision import models
import torch.optim as optim
from torch.optim import lr_scheduler
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision.datasets import ImageFolder
import torch.nn as nn
import pytorch_lightning as pl
import wandb
from pytorch_lightning.loggers import WandbLogger
from sklearn.metrics import f1_score
from torchmetrics.classification import MulticlassConfusionMatrix
from matplotlib import colors, pyplot as plt
import warnings


%matplotlib inline
warnings.filterwarnings(action='ignore', category=DeprecationWarning)
!nvidia-smi

Sun Nov 12 19:00:19 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 536.67                 Driver Version: 536.67       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
| 90%   31C    P8              21W / 200W |    357MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
DATA_MODES = ['train', 'val', 'test']
RESCALE_SIZE = 299
DEVICE =  torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [3]:
def seed_everything(seed):
    # Фискирует максимум сидов.
    # Это понадобится, чтобы сравнение оптимизаторов было корректным
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [4]:
class SimpsonsDataset(Dataset):
    """
    Датасет с картинками, который паралельно подгружает их из папок
    производит скалирование и превращение в торчевые тензоры
    """
    def __init__(self, files, mode, transform):
        super().__init__()
        # список файлов для загрузки
        self.files = sorted(files)
        # режим работы
        self.mode = mode
        self.transform = transform

        if self.mode not in DATA_MODES:
            print(f"{self.mode} is not correct; correct modes: {DATA_MODES}")
            raise NameError

        self.len_ = len(self.files)

        self.label_encoder = LabelEncoder()

        if self.mode != 'test':
            self.labels = [path.parent.name for path in self.files]
            self.label_encoder.fit(self.labels)

            with open('label_encoder.pkl', 'wb') as le_dump_file:
                  pickle.dump(self.label_encoder, le_dump_file)

    def __len__(self):
        return self.len_

    def load_sample(self, file):
        image = Image.open(file)
        image.load()
        return image

    def __getitem__(self, index):
        # для преобразования изображений в тензоры PyTorch и нормализации входа
        x = self.load_sample(self.files[index])
        x = self._prepare_sample(x)
        x = self.transform(x)

        if self.mode == 'test':
            return x
        else:
            label = self.labels[index]
            label_id = self.label_encoder.transform([label])
            y = label_id.item()
            return x, y

    def _prepare_sample(self, image):
        image = image.resize((RESCALE_SIZE, RESCALE_SIZE))
        return image

In [6]:
TRAIN_DIR = Path('train/')
TEST_DIR = Path('testset')

In [7]:
train_val_files = sorted(list(TRAIN_DIR.rglob('*.jpg')))
test_files = sorted(list(TEST_DIR.rglob('*.jpg')))

In [8]:
from sklearn.model_selection import train_test_split

train_val_labels = [path.parent.name for path in train_val_files]
train_files, val_files = train_test_split(train_val_files, test_size=0.25, \
                                          stratify=train_val_labels, random_state=123)

In [9]:
train_dataset = SimpsonsDataset(train_files, mode='train', transform=transforms.Compose(
    [
        transforms.ToTensor(),
    ]
))

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)

#### Посчитаем среднее и стандартное отклонение по каналам

In [10]:
mean = torch.FloatTensor([0, 0, 0]).to(DEVICE)
std = torch.FloatTensor([0, 0, 0]).to(DEVICE)

for data, _ in tqdm(train_dataloader):
    data = data.to(DEVICE)
    mean += data.mean(dim=(0, 2, 3))
    std += data.std(dim=(0, 2, 3))
mean /= len(train_dataloader)
std /= len(train_dataloader)
mean, std

100%|██████████| 246/246 [01:24<00:00,  2.90it/s]


(tensor([0.4625, 0.4077, 0.3520], device='cuda:0'),
 tensor([0.2503, 0.2285, 0.2603], device='cuda:0'))

In [11]:
class SimpsonsNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.densenet161(
            weights=models.DenseNet161_Weights.IMAGENET1K_V1
        )
        self.model.classifier = nn.Linear(2208, 42)

        params_to_update = []
        update_params_name = [
            name for name, _ in self.model.named_parameters()
        ][-22:]

        for name, param in self.model.named_parameters():
            if name in update_params_name:
                param.requires_grad = True
                params_to_update.append(param)
            else:
                param.requires_grad = False
                
        # Set Optimizer
        self.optimizer = optim.AdamW(params=params_to_update, lr=1e-3)      
        
        self.loss_func = torch.nn.CrossEntropyLoss()

        self.targets = torch.Tensor()
        self.preds = torch.Tensor()

    def _forward(self, x):
        x = self.model(x)
        return x

    def forward(self, images, target=None):
        output = self._forward(images)

        if target is not None:
            loss = self.loss_func(output, target)

            self.targets = torch.cat((self.targets, target.cpu()), 0)
            pred = torch.argmax(output, dim=-1)
            self.preds = torch.cat((self.preds, pred.cpu()), 0)
            return loss

        return output

In [12]:
train_transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean.cpu(), std.cpu())
    ]
)

val_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean.cpu(), std.cpu())
    ]
)

In [13]:
train_dataset = SimpsonsDataset(train_files, mode='train', transform=train_transform)
val_dataset = SimpsonsDataset(val_files, mode='val', transform=val_transform)
test_dataset = SimpsonsDataset(test_files, mode='test', transform=val_transform)

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=64)
val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=128)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=128)

#### Реализую тренировочный цикл с помощью pytorch lightning он намного удобнее и по моим наблюдениям работает намного быстрее

In [14]:
class TrainModule(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.best_loss = None
        self.train_loss = []
        self.val_loss = []

    def forward(self, x):
        result = self.model(x)
        return result

    def configure_optimizers(self):
        optimizer = self.model.optimizer
        lambda_func = lambda epoch: 0.975 ** epoch
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_func)
        return [optimizer], [scheduler]

    def training_step(self, train_batch, batch_idx):
        images, target = train_batch
        loss = self.model(images, target)
        self.log(
            "train_loss", loss, prog_bar=True
        )
        self.train_loss.append(loss.item())
        return loss

    def validation_step(self, val_batch, batch_idx):
        images, target = val_batch
        loss = self.model(images, target)
        self.val_loss.append(loss.item())

        self.log("val_loss", loss, prog_bar=True)
        if self.best_loss is None:
            self.best_loss = loss
        if loss < self.best_loss:
            self.best_loss = loss
            torch.save(self.model, 'autosavemodel.pth')

In [15]:
model = SimpsonsNet().to(DEVICE)

In [17]:
wandb.login()
wandb_logger = WandbLogger(log_model='all', name='densenet161 --denselayer22-- optim.AdamW(params=params_to_update, lr=1e-3)')

[34m[1mwandb[0m: Currently logged in as: [33mgarkovski_dmitri[0m ([33mkuban23_[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [18]:
module = TrainModule(model)

In [19]:
seed_everything(123)

In [20]:
trainer = pl.Trainer(logger=wandb_logger, accelerator="gpu", max_epochs=7)
trainer.fit(module, train_dataloader, val_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | SimpsonsNet | 26.6 M
--------------------------------------
1.6 M     Trainable params
25.0 M    Non-trainable params
26.6 M    Total params
106.259   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

C:\Users\user\anaconda3\envs\py310\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.
C:\Users\user\anaconda3\envs\py310\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=7` reached.


In [21]:
wandb.finish()

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█████
train_loss,█▅▅▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
val_loss,█▄▂▂▂▁▁

0,1
epoch,6.0
train_loss,0.0062
trainer/global_step,1721.0
val_loss,0.14041


In [22]:
model = model.to(DEVICE)
y_pred = torch.tensor([])
actual_labels = torch.tensor([])
for img, labels in val_dataloader:
    model.eval()
    with torch.no_grad():
        probs = model(img.to(DEVICE))
        preds = np.argmax(probs.cpu(), axis=1)
        y_pred = torch.cat((y_pred, preds))
        actual_labels = torch.cat((actual_labels, labels))

In [23]:
f1 = f1_score(actual_labels, y_pred, average='micro')
print("F1-оценка:", f1)

F1-оценка: 0.965418418035919


In [24]:
test_dataset = SimpsonsDataset(test_files, mode="test", transform=val_transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=64)

y_pred = torch.tensor([])

for img in test_dataloader:
    model.eval()
    with torch.no_grad():
        probs = model(img.to(DEVICE))
        preds = np.argmax(probs.cpu(), axis=1)
        y_pred = torch.cat((y_pred, preds))

In [25]:
preds = train_dataset.label_encoder.inverse_transform(y_pred.numpy().astype(int))
test_filenames = [path.name for path in test_dataset.files]

In [26]:
my_submit = pd.DataFrame({'Id': test_filenames, 'Expected': preds})
my_submit.to_csv('my_model.csv', index=False)

**F1 на kaggle - 0.98724**

**Мое имя на в leaderboard на kaggle - Dmitriy_Zharkovskiy_562531453**