# Project description

В данном задании вам предстоит осуществить путешевствие в мир Спрингфилда,
где вы сможете познакомиться со всеми любимыми персонажами Симпсонов.

Основным заданием будет обучить классификатор на основе сверточных сетей,
чтобы научиться отличать всех жителей Спрингфилда.
# Dataset description
Обучающая и тестовая выборка состоят из отрывков из мультсериала Симпсоны.
Каждая картинка представлена в формате jpg c необходимой меткой - названием
персонажа изображенного на ней. Тест был поделен на приватную и публичную
часть в соотношении 95/5

В тренировочном датасете примерно по 1000 картинок на каждый класс,
но они отличаются размером.

Метки классов представлены в виде названий папок, в которых лежат картинки.

# Table of content:
1. [__Data preparation__](#data_preparation)
2. [__Training models__](#training_models)
    * [__Data augmentation__](#data_augmentation)
    * [__Models__](#models)
        * [_AlexNet_](#alexnet)
        * [_VGG19_](#vgg19_bn)
        * [_ResNet152_](#resnet152)

# <a name='data_preparation'>1. Data preparation</a>

In [None]:
# download data from here
# https://www.kaggle.com/c/journey-springfield/data
import os.path
import sys
if 'google' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    !mkdir Data
    if not os.path.exists('Data/train'):
        !cp drive/My\ Drive/Colab/Stepik/Kaggle/journey-springfield.zip Data
        !unzip -q -n Data/journey-springfield.zip -d Data
        !rm Data/journey-springfield.zip

In [None]:
# load model and train functions
# from my other repositories
# https://github.com/AllexFrolov/MobileNet_v3-PyTorch
if not os.path.isfile('MobileNet_v3.py'):
    !wget -q https://raw.githubusercontent.com/AllexFrolov/MobileNet_v3-PyTorch/master/MobileNet_v3.py
if not os.path.isfile('functions.py'):
    !wget -q https://raw.githubusercontent.com/AllexFrolov/MobileNet_v3-PyTorch/master/functions.py

# -------- for debugging ----------
import functions
import MobileNet_v3
from importlib import reload
functions = reload(functions)
MobileNet_v3 = reload(MobileNet_v3)
# --------------------------------
from functions import train, accuracy

In [None]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
dataset = ImageFolder('Data/train/simpsons_dataset')

In [None]:
# look at the image
np.random.seed(42)

fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(8, 8),
                       sharey=True, sharex=True)

for fig_x in ax.flatten():
    random_characters = np.random.choice(len(dataset), 1)[0]
    im, label = dataset[random_characters]
    img_label = " ".join(map(lambda x: x.capitalize(),
                             dataset.classes[label].split('_')))
    im = im.resize((224, 244))
    fig_x.imshow(im)
    if img_label is not None:
        fig_x.set_title(img_label)
    fig_x.grid(False)

In [None]:
# Create custom DataLoader
class MyDataLoader:
    def __init__(self, data, indices: list, batch_size: int, transformer=None, shuffle=False):
        assert type(shuffle) is bool, \
            f'shuffle should be bool type, not {type(shuffle)}'
        assert type(batch_size) is int, \
            f'batch_size should be type int, not {type(batch_size)}'

        self.shuffle = shuffle
        self.batch_size = batch_size
        self.indices = indices
        self.data = data
        self.data_len = len(indices)
        self.len_ = int(np.ceil(self.data_len / batch_size))

        self.transformer = transformer
        if transformer is None:
            self.transformer = transforms.ToTensor()

    def __len__(self):
        return self.len_

    def __getitem__(self, index):
        start_index = index * self.batch_size
        end_index = min(self.data_len, start_index + self.batch_size)
        batch_indices = self.indices[start_index: end_index]
        X_batch = []
        y_batch = []
        for batch_index in batch_indices:
            X, y = self.data[batch_index]
            X = self.transformer(X)
            X_batch.append(X)
            y_batch.append(y)
        if len(X_batch) > 1:
            X_batch = torch.stack(X_batch)
        else:
            X_batch = torch.unsqueeze(X_batch[0], 0)
        return X_batch, torch.Tensor(y_batch)

    def __next__(self):
        if self.shuffle:
            np.random.shuffle(self.indices)
        for n_batch in range(self.len_):
            return self.__getitem__(n_batch)

In [None]:
# split data
train_val_indices, test_indices = train_test_split(np.arange(len(dataset)),
                                                   train_size=0.75)

train_indices, val_indices = train_test_split(train_val_indices,
                                              train_size=0.75)

# <a name='training_models'>2. Training models</a>

## <a name='data_augmentation'>Data augmentation</a>


In [None]:
IM_SIZE = (224, 224)
batch_size = 64

train_transformer = transforms.Compose([transforms.Resize(IM_SIZE),
                                        transforms.RandomRotation(15),
                                        transforms.ColorJitter(0.5, 0.5),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],
                                                             [0.229, 0.224, 0.225])
                                        ])

val_transformer = transforms.Compose([transforms.Resize(IM_SIZE),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],
                                                             [0.229, 0.224, 0.225])
                                        ])

# Create data loaders
train_loader = MyDataLoader(dataset, train_indices, batch_size,
                            train_transformer, True)
val_loader = MyDataLoader(dataset, val_indices, batch_size, val_transformer)
test_loader = MyDataLoader(dataset, test_indices, batch_size)

In [None]:
num_classes = len(dataset.classes)

## <a name='models'>Models</a>

In [None]:
models_history = {}

### <a name='alexnet'>AlexNet</a>

In [None]:
# Download pretrained model
alexnet = models.alexnet(pretrained=True)

# freeze parameters
for name, param in alexnet.named_parameters():
    if 'classifier' not in name:
        param.requires_grad = False

# replace last layer
in_dim = alexnet.classifier[-1].in_features
classifier = nn.Linear(in_dim, num_classes)
alexnet.classifier[-1] = classifier
alexnet = alexnet.to(DEVICE)

In [None]:
lr = 1e-3
WEIGHT_DECAY = 1e-4
optimizer = torch.optim.Adam(alexnet.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
%%time
epoch_count=15
# history, best_param = \
#         train(alexnet, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)
# models_history['alexnet'] = history

```
[ 1/15]|Mode: T |Loss: 2.047 |Metric: 0.4601 |: 100%|██████████| 184/184 [01:30<00:00, 2.03it/s]
[ 1/15]|Mode: V |Loss: 1.24 |Metric: 0.6675 |: 100%|██████████| 62/62 [00:20<00:00, 3.09it/s]
[ 2/15]|Mode: T |Loss: 1.425 |Metric: 0.6125 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 2/15]|Mode: V |Loss: 1.073 |Metric: 0.7159 |: 100%|██████████| 62/62 [00:20<00:00, 3.02it/s]
[ 3/15]|Mode: T |Loss: 1.289 |Metric: 0.6497 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 3/15]|Mode: V |Loss: 1.008 |Metric: 0.726 |: 100%|██████████| 62/62 [00:20<00:00, 3.09it/s]
[ 4/15]|Mode: T |Loss: 1.226 |Metric: 0.6698 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 4/15]|Mode: V |Loss: 1.035 |Metric: 0.726 |: 100%|██████████| 62/62 [00:20<00:00, 3.08it/s]
[ 5/15]|Mode: T |Loss: 1.135 |Metric: 0.6956 |: 100%|██████████| 184/184 [01:30<00:00, 2.03it/s]
[ 5/15]|Mode: V |Loss: 0.9263 |Metric: 0.761 |: 100%|██████████| 62/62 [00:20<00:00, 3.10it/s]
[ 6/15]|Mode: T |Loss: 1.108 |Metric: 0.7023 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 6/15]|Mode: V |Loss: 0.9521 |Metric: 0.7555 |: 100%|██████████| 62/62 [00:20<00:00, 3.09it/s]
[ 7/15]|Mode: T |Loss: 1.077 |Metric: 0.7075 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 7/15]|Mode: V |Loss: 0.923 |Metric: 0.7588 |: 100%|██████████| 62/62 [00:20<00:00, 3.06it/s]
0%| | 0/184 [00:00<?, ?it/s]Epoch 7: reducing learning rate of group 0 to 5.0000e-04.
[ 8/15]|Mode: T |Loss: 0.8428 |Metric: 0.7636 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 8/15]|Mode: V |Loss: 0.8032 |Metric: 0.7923 |: 100%|██████████| 62/62 [00:20<00:00, 3.09it/s]
[ 9/15]|Mode: T |Loss: 0.7782 |Metric: 0.7846 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 9/15]|Mode: V |Loss: 0.7646 |Metric: 0.8019 |: 100%|██████████| 62/62 [00:19<00:00, 3.10it/s]
[ 10/15]|Mode: T |Loss: 0.7226 |Metric: 0.7914 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 10/15]|Mode: V |Loss: 0.7645 |Metric: 0.7971 |: 100%|██████████| 62/62 [00:20<00:00, 3.07it/s]
[ 11/15]|Mode: T |Loss: 0.6709 |Metric: 0.8067 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 11/15]|Mode: V |Loss: 0.7901 |Metric: 0.793 |: 100%|██████████| 62/62 [00:20<00:00, 3.08it/s]
0%| | 0/184 [00:00<?, ?it/s]Epoch 11: reducing learning rate of group 0 to 2.5000e-04.
[ 12/15]|Mode: T |Loss: 0.5974 |Metric: 0.8283 |: 100%|██████████| 184/184 [01:29<00:00, 2.05it/s]
[ 12/15]|Mode: V |Loss: 0.7141 |Metric: 0.8104 |: 100%|██████████| 62/62 [00:20<00:00, 3.09it/s]
[ 13/15]|Mode: T |Loss: 0.5437 |Metric: 0.8437 |: 100%|██████████| 184/184 [01:29<00:00, 2.05it/s]
[ 13/15]|Mode: V |Loss: 0.6848 |Metric: 0.8245 |: 100%|██████████| 62/62 [00:19<00:00, 3.11it/s]
[ 14/15]|Mode: T |Loss: 0.5234 |Metric: 0.8495 |: 100%|██████████| 184/184 [01:30<00:00, 2.04it/s]
[ 14/15]|Mode: V |Loss: 0.696 |Metric: 0.8228 |: 100%|██████████| 62/62 [00:19<00:00, 3.11it/s]
[ 15/15]|Mode: T |Loss: 0.4899 |Metric: 0.8564 |: 100%|██████████| 184/184 [01:29<00:00, 2.05it/s]
[ 15/15]|Mode: V |Loss: 0.6747 |Metric: 0.8228 |: 100%|██████████| 62/62 [00:20<00:00, 3.08it/s]
Epoch 15: reducing learning rate of group 0 to 1.2500e-04.
CPU times: user 25min 54s, sys: 1min 36s, total: 27min 31s
Wall time: 27min 34s
```

### <a name='vgg19_bn'>VGG19</a>

In [None]:
# Download pretrained model
vgg19_bn = models.vgg19_bn(pretrained=True)

# freeze parameters
for name, param in vgg19_bn.named_parameters():
    if 'classifier' not in name or 'classifier.0' in name:
        param.requires_grad = False

# replace last layer
in_dim = vgg19_bn.classifier[-1].in_features
classifier = nn.Linear(in_dim, num_classes)
vgg19_bn.classifier[-1] = classifier
vgg19_bn = vgg19_bn.to(DEVICE)

In [None]:
lr = 1e-3
WEIGHT_DECAY = 1e-5
optimizer = torch.optim.Adam(vgg19_bn.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
%%time
# epoch_count=15
# history, best_param = \
#         train(vgg19_bn, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)
# models_history['vgg19_bn'] = history

```
[  1/15]|Mode: T |Loss:   2.025 |Metric:  0.4566 |: 100%|██████████| 184/184 [02:07<00:00,  1.44it/s]
[  1/15]|Mode: V |Loss:   1.476 |Metric:  0.5906 |: 100%|██████████| 62/62 [00:33<00:00,  1.87it/s]
[  2/15]|Mode: T |Loss:   1.532 |Metric:  0.5767 |: 100%|██████████| 184/184 [02:06<00:00,  1.45it/s]
[  2/15]|Mode: V |Loss:   1.313 |Metric:  0.6362 |: 100%|██████████| 62/62 [00:33<00:00,  1.88it/s]
[  3/15]|Mode: T |Loss:   1.395 |Metric:  0.6075 |: 100%|██████████| 184/184 [02:07<00:00,  1.45it/s]
[  3/15]|Mode: V |Loss:   1.204 |Metric:  0.6609 |: 100%|██████████| 62/62 [00:32<00:00,  1.90it/s]
[  4/15]|Mode: T |Loss:   1.328 |Metric:  0.6203 |: 100%|██████████| 184/184 [02:05<00:00,  1.47it/s]
[  4/15]|Mode: V |Loss:   1.153 |Metric:  0.6876 |: 100%|██████████| 62/62 [00:32<00:00,  1.91it/s]
[  5/15]|Mode: T |Loss:   1.266 |Metric:   0.635 |: 100%|██████████| 184/184 [02:05<00:00,  1.46it/s]
[  5/15]|Mode: V |Loss:    1.12 |Metric:  0.6914 |: 100%|██████████| 62/62 [00:32<00:00,  1.91it/s]
[  6/15]|Mode: T |Loss:   1.196 |Metric:  0.6542 |: 100%|██████████| 184/184 [02:04<00:00,  1.47it/s]
[  6/15]|Mode: V |Loss:   1.066 |Metric:  0.7003 |: 100%|██████████| 62/62 [00:32<00:00,  1.91it/s]
[  7/15]|Mode: T |Loss:   1.158 |Metric:  0.6655 |: 100%|██████████| 184/184 [02:04<00:00,  1.47it/s]
[  7/15]|Mode: V |Loss:   1.083 |Metric:  0.7048 |: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]
[  8/15]|Mode: T |Loss:   1.115 |Metric:  0.6774 |: 100%|██████████| 184/184 [02:06<00:00,  1.46it/s]
[  8/15]|Mode: V |Loss:   1.032 |Metric:  0.7159 |: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]
[  9/15]|Mode: T |Loss:   1.075 |Metric:  0.6843 |: 100%|██████████| 184/184 [02:06<00:00,  1.45it/s]
[  9/15]|Mode: V |Loss:   1.043 |Metric:   0.706 |: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]
[ 10/15]|Mode: T |Loss:   1.044 |Metric:  0.6941 |: 100%|██████████| 184/184 [02:05<00:00,  1.47it/s]
[ 10/15]|Mode: V |Loss:   1.025 |Metric:  0.7176 |: 100%|██████████| 62/62 [00:32<00:00,  1.92it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    10: reducing learning rate of group 0 to 5.0000e-04.
[ 11/15]|Mode: T |Loss:  0.8987 |Metric:  0.7294 |: 100%|██████████| 184/184 [02:06<00:00,  1.45it/s]
[ 11/15]|Mode: V |Loss:   0.936 |Metric:  0.7416 |: 100%|██████████| 62/62 [00:32<00:00,  1.89it/s]
[ 12/15]|Mode: T |Loss:  0.8583 |Metric:  0.7444 |: 100%|██████████| 184/184 [02:05<00:00,  1.46it/s]
[ 12/15]|Mode: V |Loss:  0.9478 |Metric:  0.7391 |: 100%|██████████| 62/62 [00:32<00:00,  1.90it/s]
[ 13/15]|Mode: T |Loss:  0.8258 |Metric:  0.7512 |: 100%|██████████| 184/184 [02:05<00:00,  1.46it/s]
[ 13/15]|Mode: V |Loss:  0.9169 |Metric:  0.7452 |: 100%|██████████| 62/62 [00:32<00:00,  1.90it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    13: reducing learning rate of group 0 to 2.5000e-04.
[ 14/15]|Mode: T |Loss:  0.7682 |Metric:   0.767 |: 100%|██████████| 184/184 [02:05<00:00,  1.46it/s]
[ 14/15]|Mode: V |Loss:   0.891 |Metric:   0.758 |: 100%|██████████| 62/62 [00:32<00:00,  1.90it/s]
[ 15/15]|Mode: T |Loss:  0.7331 |Metric:  0.7778 |: 100%|██████████| 184/184 [02:06<00:00,  1.46it/s]
[ 15/15]|Mode: V |Loss:  0.8824 |Metric:   0.759 |: 100%|██████████| 62/62 [00:32<00:00,  1.90it/s]CPU times: user 33min 56s, sys: 5min 44s, total: 39min 41s
Wall time: 39min 43s
```

### <a name='resnet152'>ResNet152</a>

In [None]:
resnet152 = models.resnet152(pretrained=True)
# freeze parameters
for name, param in resnet152.named_parameters():
    if 'layer4' not in name:
        param.requires_grad = False

# replace last layer
in_dim = resnet152.fc.in_features
classifier = nn.Sequential(
    nn.Dropout(p=0.8),
    nn.Linear(in_dim, num_classes)
    )
resnet152.fc = classifier
resnet152 = resnet152.to(DEVICE)

In [None]:
lr = 1e-2
WEIGHT_DECAY = 1e-5
optimizer = torch.optim.Adam(resnet152.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
%%time
# epoch_count=15
# history, best_param = \
#         train(resnet152, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)

# models_history['resnet152'] = history

```
[  1/15]|Mode: T |Loss:   1.845 |Metric:  0.5468 |: 100%|██████████| 184/184 [02:18<00:00,  1.33it/s]
[  1/15]|Mode: V |Loss:  0.8647 |Metric:  0.8008 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  2/15]|Mode: T |Loss:  0.6823 |Metric:  0.8295 |: 100%|██████████| 184/184 [02:17<00:00,  1.33it/s]
[  2/15]|Mode: V |Loss:  0.4661 |Metric:  0.8808 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  3/15]|Mode: T |Loss:  0.4814 |Metric:  0.8803 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  3/15]|Mode: V |Loss:  0.4322 |Metric:  0.8918 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  4/15]|Mode: T |Loss:  0.4143 |Metric:  0.8957 |: 100%|██████████| 184/184 [02:17<00:00,  1.33it/s]
[  4/15]|Mode: V |Loss:  0.3416 |Metric:  0.9138 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  5/15]|Mode: T |Loss:  0.3477 |Metric:  0.9116 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  5/15]|Mode: V |Loss:  0.3426 |Metric:   0.916 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  6/15]|Mode: T |Loss:  0.3621 |Metric:   0.913 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  6/15]|Mode: V |Loss:   0.263 |Metric:  0.9332 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[  7/15]|Mode: T |Loss:  0.2838 |Metric:  0.9286 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  7/15]|Mode: V |Loss:  0.2604 |Metric:  0.9362 |: 100%|██████████| 62/62 [00:34<00:00,  1.80it/s]
[  8/15]|Mode: T |Loss:  0.2557 |Metric:   0.936 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  8/15]|Mode: V |Loss:  0.2437 |Metric:  0.9473 |: 100%|██████████| 62/62 [00:34<00:00,  1.80it/s]
[  9/15]|Mode: T |Loss:   0.245 |Metric:  0.9365 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[  9/15]|Mode: V |Loss:  0.3097 |Metric:  0.9249 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
[ 10/15]|Mode: T |Loss:  0.2217 |Metric:  0.9428 |: 100%|██████████| 184/184 [02:17<00:00,  1.34it/s]
[ 10/15]|Mode: V |Loss:  0.2561 |Metric:  0.9382 |: 100%|██████████| 62/62 [00:34<00:00,  1.79it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    10: reducing learning rate of group 0 to 5.0000e-03.
[ 11/15]|Mode: T |Loss:  0.1448 |Metric:  0.9642 |: 100%|██████████| 184/184 [02:15<00:00,  1.35it/s]
[ 11/15]|Mode: V |Loss:   0.228 |Metric:  0.9478 |: 100%|██████████| 62/62 [00:34<00:00,  1.80it/s]
[ 12/15]|Mode: T |Loss:   0.123 |Metric:  0.9661 |: 100%|██████████| 184/184 [02:15<00:00,  1.35it/s]
[ 12/15]|Mode: V |Loss:  0.2034 |Metric:  0.9549 |: 100%|██████████| 62/62 [00:34<00:00,  1.81it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    12: reducing learning rate of group 0 to 2.5000e-03.
[ 13/15]|Mode: T |Loss:  0.08531 |Metric:  0.9783 |: 100%|██████████| 184/184 [02:15<00:00,  1.36it/s]
[ 13/15]|Mode: V |Loss:  0.1788 |Metric:  0.9629 |: 100%|██████████| 62/62 [00:34<00:00,  1.81it/s]
[ 14/15]|Mode: T |Loss:  0.06895 |Metric:  0.9807 |: 100%|██████████| 184/184 [02:15<00:00,  1.36it/s]
[ 14/15]|Mode: V |Loss:  0.1722 |Metric:  0.9645 |: 100%|██████████| 62/62 [00:34<00:00,  1.81it/s]
[ 15/15]|Mode: T |Loss:  0.05891 |Metric:  0.9844 |: 100%|██████████| 184/184 [02:15<00:00,  1.36it/s]
[ 15/15]|Mode: V |Loss:  0.1697 |Metric:  0.9645 |: 100%|██████████| 62/62 [00:34<00:00,  1.81it/s]Epoch    15: reducing learning rate of group 0 to 1.2500e-03.
CPU times: user 36min 50s, sys: 6min, total: 42min 51s
Wall time: 42min 53s
```

### <a name='squeezenet1_1'>SqueezeNet1_1</a>

In [None]:
squeezenet1_1 = models.squeezenet1_1(pretrained=True)

# freeze parameters
for name, param in alexnet.named_parameters():
    if 'classifier' not in name:
        param.requires_grad = False

# replace last layer
in_dim = squeezenet1_1.classifier[1].in_channels
classifier = nn.Conv2d(in_dim, num_classes, 1)

squeezenet1_1.classifier[1] = classifier
squeezenet1_1 = squeezenet1_1.to(DEVICE)

In [None]:
lr = 1e-3
WEIGHT_DECAY = 1e-4
optimizer = torch.optim.Adam(squeezenet1_1.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
%%time
# epoch_count=15
# history, best_param = \
#         train(squeezenet1_1, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)
# models_history['squeezenet1_1'] = history

```
[  1/15]|Mode: T |Loss:   3.164 |Metric:  0.1054 |: 100%|██████████| 184/184 [01:39<00:00,  1.86it/s]
[  1/15]|Mode: V |Loss:   2.945 |Metric:  0.1349 |: 100%|██████████| 62/62 [00:21<00:00,  2.87it/s]
[  2/15]|Mode: T |Loss:   2.807 |Metric:  0.1977 |: 100%|██████████| 184/184 [01:39<00:00,  1.85it/s]
[  2/15]|Mode: V |Loss:   2.406 |Metric:  0.3199 |: 100%|██████████| 62/62 [00:21<00:00,  2.84it/s]
[  3/15]|Mode: T |Loss:   2.152 |Metric:  0.3976 |: 100%|██████████| 184/184 [01:40<00:00,  1.83it/s]
[  3/15]|Mode: V |Loss:   1.624 |Metric:  0.5564 |: 100%|██████████| 62/62 [00:21<00:00,  2.82it/s]
[  4/15]|Mode: T |Loss:   1.491 |Metric:  0.5963 |: 100%|██████████| 184/184 [01:40<00:00,  1.83it/s]
[  4/15]|Mode: V |Loss:   1.143 |Metric:  0.7061 |: 100%|██████████| 62/62 [00:21<00:00,  2.84it/s]
[  5/15]|Mode: T |Loss:    1.18 |Metric:  0.6995 |: 100%|██████████| 184/184 [01:40<00:00,  1.83it/s]
[  5/15]|Mode: V |Loss:  0.9879 |Metric:  0.7416 |: 100%|██████████| 62/62 [00:21<00:00,  2.83it/s]
[  6/15]|Mode: T |Loss:  0.9631 |Metric:  0.7522 |: 100%|██████████| 184/184 [01:40<00:00,  1.82it/s]
[  6/15]|Mode: V |Loss:  0.8767 |Metric:  0.7643 |: 100%|██████████| 62/62 [00:21<00:00,  2.83it/s]
[  7/15]|Mode: T |Loss:  0.8277 |Metric:  0.7839 |: 100%|██████████| 184/184 [01:39<00:00,  1.85it/s]
[  7/15]|Mode: V |Loss:  0.7745 |Metric:  0.7923 |: 100%|██████████| 62/62 [00:21<00:00,  2.88it/s]
[  8/15]|Mode: T |Loss:  0.7085 |Metric:  0.8126 |: 100%|██████████| 184/184 [01:38<00:00,  1.87it/s]
[  8/15]|Mode: V |Loss:  0.7208 |Metric:  0.8099 |: 100%|██████████| 62/62 [00:21<00:00,  2.88it/s]
[  9/15]|Mode: T |Loss:  0.6178 |Metric:  0.8418 |: 100%|██████████| 184/184 [01:38<00:00,  1.87it/s]
[  9/15]|Mode: V |Loss:  0.6497 |Metric:  0.8293 |: 100%|██████████| 62/62 [00:21<00:00,  2.93it/s]
[ 10/15]|Mode: T |Loss:  0.5561 |Metric:  0.8522 |: 100%|██████████| 184/184 [01:37<00:00,  1.89it/s]
[ 10/15]|Mode: V |Loss:  0.6294 |Metric:  0.8349 |: 100%|██████████| 62/62 [00:21<00:00,  2.91it/s]
[ 11/15]|Mode: T |Loss:  0.5469 |Metric:  0.8555 |: 100%|██████████| 184/184 [01:37<00:00,  1.88it/s]
[ 11/15]|Mode: V |Loss:  0.5804 |Metric:  0.8505 |: 100%|██████████| 62/62 [00:21<00:00,  2.91it/s]
[ 12/15]|Mode: T |Loss:  0.4714 |Metric:  0.8733 |: 100%|██████████| 184/184 [01:38<00:00,  1.87it/s]
[ 12/15]|Mode: V |Loss:  0.5544 |Metric:  0.8485 |: 100%|██████████| 62/62 [00:21<00:00,  2.86it/s]
[ 13/15]|Mode: T |Loss:  0.4445 |Metric:  0.8812 |: 100%|██████████| 184/184 [01:39<00:00,  1.84it/s]
[ 13/15]|Mode: V |Loss:  0.4989 |Metric:  0.8689 |: 100%|██████████| 62/62 [00:21<00:00,  2.84it/s]
[ 14/15]|Mode: T |Loss:  0.4184 |Metric:  0.8884 |: 100%|██████████| 184/184 [01:39<00:00,  1.85it/s]
[ 14/15]|Mode: V |Loss:  0.5035 |Metric:  0.8659 |: 100%|██████████| 62/62 [00:21<00:00,  2.91it/s]
[ 15/15]|Mode: T |Loss:  0.3925 |Metric:  0.8965 |: 100%|██████████| 184/184 [01:37<00:00,  1.88it/s]
[ 15/15]|Mode: V |Loss:  0.8555 |Metric:  0.8215 |: 100%|██████████| 62/62 [00:21<00:00,  2.92it/s]Epoch    15: reducing learning rate of group 0 to 5.0000e-04.
CPU times: user 28min 45s, sys: 1min 24s, total: 30min 10s
Wall time: 30min 11s
```

### <a name='densenet201'>DenseNet201</a>

In [None]:
densenet201 = models.densenet201(pretrained=True)

# freeze parameters
for name, param in densenet201.named_parameters():
    if 'classifier' not in name and 'denseblock4.denselayer32' not in name and 'norm5' not in name:
        param.requires_grad = False

# replace last layer
in_dim = densenet201.classifier.in_features
classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(in_dim, num_classes)
    )
densenet201.classifier = classifier
densenet201 = densenet201.to(DEVICE)

In [None]:
lr = 1e-2
WEIGHT_DECAY = 1e-7
optimizer = torch.optim.Adam(densenet201.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
%%time
# epoch_count=15
# history, best_param = \
#         train(densenet201, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)
# models_history['densenet201'] = history

```
[  1/15]|Mode: T |Loss:   1.755 |Metric:  0.5918 |: 100%|██████████| 184/184 [02:25<00:00,  1.26it/s]
[  1/15]|Mode: V |Loss:  0.7758 |Metric:  0.8082 |: 100%|██████████| 62/62 [00:38<00:00,  1.63it/s]
[  2/15]|Mode: T |Loss:  0.9301 |Metric:  0.7577 |: 100%|██████████| 184/184 [02:23<00:00,  1.28it/s]
[  2/15]|Mode: V |Loss:   0.583 |Metric:  0.8536 |: 100%|██████████| 62/62 [00:38<00:00,  1.63it/s]
[  3/15]|Mode: T |Loss:  0.7293 |Metric:  0.8019 |: 100%|██████████| 184/184 [02:23<00:00,  1.28it/s]
[  3/15]|Mode: V |Loss:  0.5473 |Metric:  0.8558 |: 100%|██████████| 62/62 [00:37<00:00,  1.64it/s]
[  4/15]|Mode: T |Loss:  0.6154 |Metric:  0.8307 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[  4/15]|Mode: V |Loss:  0.4634 |Metric:  0.8768 |: 100%|██████████| 62/62 [00:37<00:00,  1.64it/s]
[  5/15]|Mode: T |Loss:  0.5433 |Metric:  0.8464 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[  5/15]|Mode: V |Loss:  0.3971 |Metric:  0.8911 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
[  6/15]|Mode: T |Loss:   0.494 |Metric:  0.8621 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[  6/15]|Mode: V |Loss:  0.3864 |Metric:  0.8946 |: 100%|██████████| 62/62 [00:37<00:00,  1.64it/s]
[  7/15]|Mode: T |Loss:  0.4465 |Metric:  0.8761 |: 100%|██████████| 184/184 [02:22<00:00,  1.29it/s]
[  7/15]|Mode: V |Loss:  0.4074 |Metric:  0.8954 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch     7: reducing learning rate of group 0 to 5.0000e-03.
[  8/15]|Mode: T |Loss:  0.3808 |Metric:  0.8932 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[  8/15]|Mode: V |Loss:  0.3517 |Metric:  0.9029 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
[  9/15]|Mode: T |Loss:  0.3474 |Metric:  0.8988 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[  9/15]|Mode: V |Loss:  0.3509 |Metric:  0.9045 |: 100%|██████████| 62/62 [00:37<00:00,  1.65it/s]
[ 10/15]|Mode: T |Loss:  0.3248 |Metric:  0.9061 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[ 10/15]|Mode: V |Loss:  0.3293 |Metric:  0.9135 |: 100%|██████████| 62/62 [00:37<00:00,  1.65it/s]
[ 11/15]|Mode: T |Loss:  0.3134 |Metric:   0.914 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[ 11/15]|Mode: V |Loss:  0.3427 |Metric:  0.9115 |: 100%|██████████| 62/62 [00:37<00:00,  1.65it/s]
[ 12/15]|Mode: T |Loss:  0.2915 |Metric:  0.9143 |: 100%|██████████| 184/184 [02:20<00:00,  1.31it/s]
[ 12/15]|Mode: V |Loss:  0.3398 |Metric:  0.9145 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    12: reducing learning rate of group 0 to 2.5000e-03.
[ 13/15]|Mode: T |Loss:  0.2697 |Metric:  0.9227 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[ 13/15]|Mode: V |Loss:  0.3055 |Metric:  0.9234 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
[ 14/15]|Mode: T |Loss:  0.2587 |Metric:  0.9257 |: 100%|██████████| 184/184 [02:21<00:00,  1.30it/s]
[ 14/15]|Mode: V |Loss:  0.3003 |Metric:  0.9254 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
[ 15/15]|Mode: T |Loss:  0.2435 |Metric:  0.9289 |: 100%|██████████| 184/184 [02:20<00:00,  1.31it/s]
[ 15/15]|Mode: V |Loss:  0.3118 |Metric:  0.9224 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]Epoch    15: reducing learning rate of group 0 to 1.2500e-03.
CPU times: user 36min 41s, sys: 8min 8s, total: 44min 49s
Wall time: 44min 54s
```

### <a name='inception_v3'>Inception_v3</a>

In [None]:
IM_SIZE = (299, 299)
batch_size = 64

train_transformer = transforms.Compose([transforms.Resize(IM_SIZE),
                                        transforms.RandomRotation(15),
                                        transforms.ColorJitter(0.5, 0.5),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],
                                                             [0.229, 0.224, 0.225])
                                        ])

val_transformer = transforms.Compose([transforms.Resize(IM_SIZE),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406],
                                                             [0.229, 0.224, 0.225])
                                        ])

# Create data loaders
train_loader_inc = MyDataLoader(dataset, train_indices, batch_size,
                            train_transformer, True)
val_loader_inc= MyDataLoader(dataset, val_indices, batch_size, val_transformer)
test_loader_inc = MyDataLoader(dataset, test_indices, batch_size)

In [None]:
inception_v3 = models.inception_v3(pretrained=True, aux_logits=False)

# freeze parameters
for name, param in inception_v3.named_parameters():
    if 'avgpool' not in name and 'Mixed_7c' not in name:
        param.requires_grad = False

# replace last layer
in_dim = inception_v3.fc.in_features
classifier = nn.Linear(in_dim, num_classes)
inception_v3.fc = classifier
inception_v3.dropout.p = 0.8
inception_v3 = inception_v3.to(DEVICE)

In [None]:
lr = 1e-2
WEIGHT_DECAY = 1e-6
optimizer = torch.optim.Adam(inception_v3.parameters(),
                             lr=lr,
                             weight_decay=WEIGHT_DECAY)

FACTOR = 0.5
THRESHOLD = 0.01
PATIENCE = 1

loss_func = nn.CrossEntropyLoss().to(DEVICE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 'max', FACTOR, PATIENCE, True, THRESHOLD
)

In [None]:
# %%time
# epoch_count=15
# history, best_param = \
#         train(inception_v3, train_loader, loss_func, optimizer, epoch_count,
#               accuracy, val_loader, scheduler)
# models_history['inception_v3'] = history

```
[  1/15]|Mode: T |Loss:   1.735 |Metric:  0.5925 |: 100%|██████████| 184/184 [02:37<00:00,  1.17it/s]
[  1/15]|Mode: V |Loss:  0.8714 |Metric:  0.7792 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[  2/15]|Mode: T |Loss:  0.8511 |Metric:  0.7871 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[  2/15]|Mode: V |Loss:  0.6562 |Metric:  0.8339 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[  3/15]|Mode: T |Loss:  0.6538 |Metric:  0.8338 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[  3/15]|Mode: V |Loss:  0.6274 |Metric:  0.8493 |: 100%|██████████| 62/62 [00:37<00:00,  1.67it/s]
[  4/15]|Mode: T |Loss:  0.5366 |Metric:  0.8621 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[  4/15]|Mode: V |Loss:  0.5603 |Metric:  0.8636 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[  5/15]|Mode: T |Loss:  0.4934 |Metric:  0.8746 |: 100%|██████████| 184/184 [02:40<00:00,  1.15it/s]
[  5/15]|Mode: V |Loss:  0.5153 |Metric:  0.8714 |: 100%|██████████| 62/62 [00:38<00:00,  1.60it/s]
[  6/15]|Mode: T |Loss:  0.4141 |Metric:  0.8932 |: 100%|██████████| 184/184 [02:41<00:00,  1.14it/s]
[  6/15]|Mode: V |Loss:   0.542 |Metric:  0.8661 |: 100%|██████████| 62/62 [00:37<00:00,  1.65it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch     6: reducing learning rate of group 0 to 5.0000e-03.
[  7/15]|Mode: T |Loss:  0.3023 |Metric:  0.9197 |: 100%|██████████| 184/184 [02:38<00:00,  1.16it/s]
[  7/15]|Mode: V |Loss:  0.4365 |Metric:  0.8954 |: 100%|██████████| 62/62 [00:37<00:00,  1.66it/s]
[  8/15]|Mode: T |Loss:  0.2467 |Metric:  0.9335 |: 100%|██████████| 184/184 [02:39<00:00,  1.16it/s]
[  8/15]|Mode: V |Loss:  0.4432 |Metric:  0.8987 |: 100%|██████████| 62/62 [00:37<00:00,  1.67it/s]
[  9/15]|Mode: T |Loss:  0.2188 |Metric:    0.94 |: 100%|██████████| 184/184 [02:37<00:00,  1.17it/s]
[  9/15]|Mode: V |Loss:  0.4151 |Metric:  0.9022 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch     9: reducing learning rate of group 0 to 2.5000e-03.
[ 10/15]|Mode: T |Loss:  0.1814 |Metric:  0.9502 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[ 10/15]|Mode: V |Loss:  0.3915 |Metric:   0.912 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[ 11/15]|Mode: T |Loss:  0.1431 |Metric:  0.9597 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[ 11/15]|Mode: V |Loss:   0.396 |Metric:  0.9077 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[ 12/15]|Mode: T |Loss:  0.1315 |Metric:  0.9651 |: 100%|██████████| 184/184 [02:36<00:00,  1.18it/s]
[ 12/15]|Mode: V |Loss:   0.397 |Metric:  0.9102 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    12: reducing learning rate of group 0 to 1.2500e-03.
[ 13/15]|Mode: T |Loss:  0.1162 |Metric:  0.9663 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[ 13/15]|Mode: V |Loss:  0.3885 |Metric:  0.9118 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
[ 14/15]|Mode: T |Loss:  0.1008 |Metric:  0.9719 |: 100%|██████████| 184/184 [02:36<00:00,  1.17it/s]
[ 14/15]|Mode: V |Loss:  0.3898 |Metric:   0.912 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]
  0%|          | 0/184 [00:00<?, ?it/s]Epoch    14: reducing learning rate of group 0 to 6.2500e-04.
[ 15/15]|Mode: T |Loss:  0.09568 |Metric:  0.9727 |: 100%|██████████| 184/184 [02:36<00:00,  1.18it/s]
[ 15/15]|Mode: V |Loss:  0.3889 |Metric:   0.913 |: 100%|██████████| 62/62 [00:36<00:00,  1.69it/s]CPU times: user 42min 3s, sys: 6min 30s, total: 48min 33s
Wall time: 48min 39s
```