## Обучите CNN на CIFAR-100 через дообучение ImageNet Resnet-50.

In [1]:
import torch
import numpy as np
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch import nn
from torch import optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from torchsummary import summary


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
def train_valid_split(Xt):
    X_train, X_test = train_test_split(Xt, test_size=0.05, random_state=13)
    return X_train, X_test


## Подача датасета

In [4]:
dataset = torchvision.datasets.CIFAR100(root='data/', train=True, download=True)


class MyOwnCifar(torch.utils.data.Dataset):

    def __init__(self, init_dataset, transform=None):
        self._base_dataset = init_dataset
        self.transform = transform

    def __len__(self):
        return len(self._base_dataset)

    def __getitem__(self, idx):
        img = self._base_dataset[idx][0]
        if self.transform is not None:
            img = self.transform(img)
        return img, self._base_dataset[idx][1]

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:04<00:00, 41973791.63it/s]


Extracting data/cifar-100-python.tar.gz to data/


In [5]:
trans_actions = transforms.Compose([torchvision.transforms.Resize(256),
                                    transforms.RandomCrop(224, padding=4),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std=[0.229, 0.224, 0.225])])
#mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
valid_transforms = transforms.Compose([transforms.ToTensor(),
                                       transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                         std=[0.229, 0.224, 0.225])])

train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, trans_actions)
valid_dataset = MyOwnCifar(valid_dataset, valid_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=30,
                          shuffle=True,
                          num_workers=1)

valid_loader = torch.utils.data.DataLoader(valid_dataset,
                          batch_size=30,
                          shuffle=False,
                          num_workers=1)

## Импорт модели и ее модификации для класификации в 100 класов

In [6]:
from torchvision import models

In [7]:
ef1 = models.efficientnet_b1(pretrained=True)
ef1_2 = torch.nn.Sequential(*(list(ef1.children())[:-1]))
ef1_2.fc = nn.Sequential(nn.Flatten(), nn.Linear(1280, 100))
ef1_2 = ef1_2.to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 59.0MB/s]


In [8]:
summary(ef1_2, input_size=(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 112, 112]               0
           Conv2d-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1, 16, 1

## Обучени

In [9]:
optimizer = torch.optim.Adam(ef1_2.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [10]:
num_epochs = 10
ef1_2.train()

for epoch in range(num_epochs):
    running_loss, running_items, running_right = 0.0, 0.0, 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data[0].to(device), data[1].to(device)

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = ef1_2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)
        running_right += (labels == torch.max(outputs, 1)[1]).sum()

        # выводим статистику о процессе обучения
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            ef1_2.eval()

            print(f'Epoch [{epoch + 1}/{num_epochs}]. ' \
                  f'Step [{i + 1}/{len(train_loader)}]. ' \
                  f'Loss: {running_loss / running_items:.3f}. ' \
                  f'Acc: {(running_right / running_items)*100:.3f}', end='. ')
            running_loss, running_items, running_right = 0.0, 0.0, 0.0

            test_running_right, test_running_total = 0.0, 0.0
            for i, data in enumerate(valid_loader):

                test_outputs = ef1_2(data[0].to(device))
                test_running_total += len(data[1])
                test_running_right += (data[1].to(device) == torch.max(test_outputs, 1)[1]).sum()

            print(f'Test acc: {(test_running_right / test_running_total)*100:.3f}')

        ef1_2.train()

print('Training is finished!')

Epoch [1/10]. Step [1/1584]. Loss: 0.153. Acc: 3.333. Test acc: 1.480
Epoch [1/10]. Step [301/1584]. Loss: 0.147. Acc: 3.767. Test acc: 0.960
Epoch [1/10]. Step [601/1584]. Loss: 0.131. Acc: 8.167. Test acc: 1.120
Epoch [1/10]. Step [901/1584]. Loss: 0.123. Acc: 10.856. Test acc: 1.120
Epoch [1/10]. Step [1201/1584]. Loss: 0.116. Acc: 14.422. Test acc: 0.440
Epoch [1/10]. Step [1501/1584]. Loss: 0.111. Acc: 17.467. Test acc: 1.080
Epoch [2/10]. Step [1/1584]. Loss: 0.107. Acc: 23.333. Test acc: 1.080
Epoch [2/10]. Step [301/1584]. Loss: 0.105. Acc: 20.833. Test acc: 1.480
Epoch [2/10]. Step [601/1584]. Loss: 0.104. Acc: 21.689. Test acc: 1.080
Epoch [2/10]. Step [901/1584]. Loss: 0.100. Acc: 23.400. Test acc: 0.760
Epoch [2/10]. Step [1201/1584]. Loss: 0.097. Acc: 25.000. Test acc: 1.480
Epoch [2/10]. Step [1501/1584]. Loss: 0.095. Acc: 27.578. Test acc: 1.120
Epoch [3/10]. Step [1/1584]. Loss: 0.093. Acc: 33.333. Test acc: 1.800
Epoch [3/10]. Step [301/1584]. Loss: 0.092. Acc: 28.511.

## Финальная точность

In [11]:
ef1_2.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in valid_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = ef1_2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('The final Test Accuracy of the model: {} %'.format((correct / total) * 100))



The final Test Accuracy of the model: 1.16 %


## Выводы

Модель resnet34, resnet50 крайне медлено начинали обучение и обычно просто застревали.  Выбраная модель Eficient net b1 с претренированными весами от imagenet показывает более быстрою динамику обучения за счет наличия более лучшей точки входа для обучения часть весов хранящихся в Eficient net b1 более близки к различным локальным минимумам. Однако данная сеть избыточна для задачи в 100 класов с разрешением 32 на 32. Так же сеть слишком склонна к переобучению на данной задаче. В сравнении с сетью созданой в ручную CCN_5, Eficient net b1 быстрее обучается на тренировочной выборке но уровень переобучения сети гороздо выше за счет большей сложности мождели

