### Part 5. Dogs classification (2+ points)
__Disclaimer__: Это опциональная часть задания. Здесь придется экспериментировать, подбирать оптимальную структуру сети для решения задачи и активно искать подскзаки в сети.

Предлагаем вам решить задачу классификации пород собак. Вы можете обучить сеть с нуля или же воспользоваться методом fine-tuning'а. Полезная ссылка на [предобученные модели](https://pytorch.org/docs/stable/torchvision/models.html).

Данные можно скачать [отсюда](https://www.dropbox.com/s/vgqpz2f1lolxmlv/data.zip?dl=0). Датасет представлен 50 классами пород собак, которые можно найти в папке train в соответствующих директориях. При сдаче данной части задания вместе с ноутбуком необходимо отправить .csv-файл с предсказаниями классов тестовой выборки в формате: <имя изображения>,<метка класса> по одному объекту на строку. Ниже приведите код ваших экспериментов и короткий вывод по их результатам.

Будут оцениваться качество классификации (accuracy) на тестовой выборке (2 балла) и проведенные эксперименты (1 балл).
Разбалловка следующая:
* $>=$93% - 2 points
* $>=$84% - 1.5 points
* $>=$70% - 0.75 points

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.2.0-py3-none-any.whl (805 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/805.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/805.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━[0m [32m634.9/805.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m805.2/805.2 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.9.0-py3-none-any.whl (23 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.9.0 torchmetrics-1.2.0


In [3]:
import torch
from torchvision import transforms
from torchsummary import summary
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics import Accuracy

from matplotlib import pyplot as plt
import numpy as np
import time
from collections import defaultdict
from tqdm.auto import tqdm

import os
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [8]:
DATA_PATH = r"drive/MyDrive/Datasets/Dog breeds/data/"
NUM_WORKERS = 2
SIZE_H = SIZE_W = 96
NUM_CLASSES = 50
EPOCH_NUM = 30
BATCH_SIZE = 256
image_mean = [0.485, 0.456, 0.406]
image_std  = [0.229, 0.224, 0.225]
EMBEDDING_SIZE = 128

In [9]:
transformer = transforms.Compose([
    transforms.Resize((SIZE_H, SIZE_W)),
    transforms.ToTensor(),
    transforms.Normalize(image_mean, image_std)
])

In [10]:
dataset = torchvision.datasets.ImageFolder(os.path.join(DATA_PATH, 'train'), transform=transformer)

In [11]:
len(dataset), len(dataset.classes)

(7166, 50)

In [12]:
train_dataset, val_dataset = torch.utils.data.random_split(dataset=dataset, lengths=(0.8, 0.2))

In [13]:
len(train_dataset), len(val_dataset)

(5733, 1433)

In [15]:
test_dataset = torchvision.datasets.ImageFolder(os.path.join(DATA_PATH, 'test'), transform=transformer)
len(test_dataset)

1503

In [18]:
def plot_from_batch_generator(batch_gen):
    data_batch, label_batch = next(iter(batch_gen))
    grid_size = (3, 3)
    f, axarr = plt.subplots(*grid_size)
    f.set_size_inches(15,10)
    for i in range(grid_size[0] * grid_size[1]):

        # read images from batch to numpy.ndarray and change axes order [H, W, C] -> [H, W, C]
        batch_image_ndarray = np.transpose(data_batch[i].numpy(), [1, 2, 0])

        # inverse normalization for image data values back to [0,1] and clipping the values for correct pyplot.imshow()
        src = np.clip(image_std * batch_image_ndarray + image_mean, 0, 1)

        # display batch samples with labels
        sample_title = 'Label = %d (%s)' % (label_batch[i])
        axarr[i // grid_size[0], i % grid_size[0]].imshow(src)
        axarr[i // grid_size[0], i % grid_size[0]].set_title(sample_title)
    pass

In [19]:
from IPython.display import clear_output

class Runner():
    def __init__(self, model, opt, device, criterion, metric, checkpoint_path=None):
        self.model = model
        self.opt = opt
        self.device = device
        self.criterion = criterion
        self.metric = metric
        self.checkpoint_path = checkpoint_path
        self.epoch = 0
        self.train_phase = True
        self.logits = None
        self._top_val_score = -1
        self.log_dict = {
            "train_loss": [],
            "val_loss": [],
            "train_score": [],
            "val_score": []}

    def forward(self, X_batch):
        logits = self.model(X_batch)
        return logits

    def _run_batch(self, batch):
        X_batch, y_batch = batch
        X_batch = X_batch.to(self.device)
        self.logits = self.forward(X_batch)

    def _run_criterion(self, batch):
        X_batch, y_batch = batch
        y_batch = y_batch.to(self.device)
        loss = self.criterion(self.logits, y_batch)
        y_pred = torch.max(F.softmax(self.logits, dim=1), dim=1)[1]
        score = self.metric(y_pred, y_batch)
        return loss, score

    def _run_epoch(self, loader, output_log=True):
        ep_loss = []
        ep_score = []
        _phase_description = 'Training' if self.train_phase else 'Evaluation'
        for batch in tqdm(loader, desc=_phase_description, leave=False):
            self._run_batch(batch)

            with torch.set_grad_enabled(self.train_phase):
                loss, score = self._run_criterion(batch)

            if self.train_phase:
                loss.backward()
                self.opt.step()
                self.opt.zero_grad()

            ep_loss.append(loss.item())
            ep_score.append(score.item())

        if self.train_phase:
            self.log_dict['train_loss'].append(np.mean(ep_loss))
            self.log_dict['train_score'].append(np.mean(ep_score))
        else:
            self.log_dict['val_loss'].append(np.mean(ep_loss))
            self.log_dict['val_score'].append(np.mean(ep_score))

        if output_log:
            self.output_log()

    def seve_checkpoint(self):
        val_score = self.log_dict['val_score'][-1]
        if val_score > self._top_val_score and self.checkpoint_path is not None:
            self._top_val_score = val_score
            torch.save(self.model, open(self.checkpoint_path, 'wb'))

    def load_checkpoint(self):
        if self.checkpoint_path is not None:
            self.model = torch.load(self.checkpoint_path)

    def output_log(self):
        if self.visualize:
            clear_output()

        phase = 'Training' if self.train_phase else 'Evaluation'

        print(f'{phase}: ', end='')
        print(f'Train loss: {self.log_dict["train_loss"][-1]}; Train score: {self.log_dict["train_score"][-1]}')
        print(f'Train loss: {self.log_dict["val_loss"][-1]}; Train score: {self.log_dict["val_score"][-1]}')

        self.save_checkpoint()

        if self.visualize:
            fig, axes = plt.subplots(1, 2, figsize=(15, 5))

            axes[0].set_title('Loss')
            axes[0].plot(self.log_dict["train_loss"], label='train')
            axes[0].plot(self.log_dict["val_loss"], label='validate')
            axes[0].legend()

            axes[1].set_title('Accuracy')
            axes[1].plot(self.log_dict["train_score"], label='train')
            axes[1].plot(self.log_dict["val_score"], label='validate')
            axes[1].legend()

            plt.show()


    def train(self, train_loader, val_loader, n_epochs, visualize=True, model=None, opt=None, criterion=None, metric=None):
        self.visualize = visualize
        self.opt = (opt or self.opt)
        self.model = (model or self.model)
        self.criterion = (criterion or self.criterion)
        self.metric = (metric or self.metric)
        self.model.train(self.train_phase)

        for _epoch in range(n_epochs):
            start_time = time.time()
            self.epoch += 1
            print(f"epoch {self.epoch:3d}/{n_epochs:3d} started")

            self.train_phase = True
            self._run_epoch(train_loader)

            print(f"epoch {self.epoch:3d}/{n_epochs:3d} took {time.time() - start_time:.2f}s")

            self.train_phase = False
            self.validate(val_loader)
            self.save_checkpoint()

    @torch.no_grad()
    def validate(self, loader, model=None):
        self.train_phase = False
        self.model.train(self.train_phase)
        self._run_epoch(loader)

    @torch.no_grad()
    def predict(self, X=None, loader=None):
        self.load_checkpoint()
        self.train_phase = False
        self.model.train(self.train_phase)
        if loader:
            pred_label = []
            for X_batch in tqdm(loader, desc='Test', leave=False):
                X_batch = X_batch.to(self.device)
                self.logits = self.forward(X_batch)
                y_pred = torch.max(F.softmax(self.logits, dim=1), dim=1)[1]
                pred_label.append(y_pred.item())
        if X:
            self.logits = self.forward(X)
            pred_label = torch.max(F.softmax(self.logits, dim=1), dim=1)[1]

        return pred_label


In [None]:
# Your experiments here