## Домашнее задание №1 по блоку CV
Преподаватель: Наталья Баданина

    Возьмите датасет https://www.kaggle.com/ajayrana/hymenoptera-data
    Реализуйте сверточную нейронную сеть с использованием tf.keras или keras. Используйте сверточные, пуллинговые и полносвязанные слои. Обучите на train выборке в течении 10 эпох, оцените качество на отложенной выборке
    Добавьте в предыдущую архитектуру слои BatchNorm. Обучите на train выборке в течении 10 эпох, оцените качество на отложенной выборке
    Создайте модель ResNet 50 (https://www.tensorflow.org/api_docs/python/tf/keras/applications/ResNet50), инициализированную случайными весами, и обучите ее на train выборке в течении 10 эпох, оцените качество на отложенной выборке
    Создайте модель ResNet 50, инициализированную весами ImageNet, и обучите ее на train выборке в течении 10 эпох, оцените качество на отложенной выборке
    Результат пришлите в виде Jupyter Notebook на github’е или расшаренного Google Colab-блокнота


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import gc
import copy
import PIL
from catalyst.utils import set_global_seed, prepare_cudnn

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BS=16
os.listdir('hymenoptera_data/hymenoptera_data/train')
SEED = 2021

np.random.seed(SEED)
set_global_seed(SEED)
prepare_cudnn()

In [3]:
image_size = 224
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
valid_transforms = transforms.Compose([
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
data_dir = 'hymenoptera_data/hymenoptera_data'
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transforms)
valid_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=valid_transforms)
     
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BS, shuffle=True, num_workers=4)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BS, shuffle=False, num_workers=4)


### Используем сверточные, пуллинговые и полносвязанные слои

In [4]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 3 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 8, 3)
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.fc1 = nn.Linear(16*54*54, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 16*54*54)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net().to(device)
print(net)

Net(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=46656, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)


In [5]:
batch_x, batch_y = next(iter(train_dataloader))
out = net(batch_x.to(device))

In [6]:
m = nn.Softmax(dim=1)
output = m(out)
output[:,0].shape

torch.Size([16])

In [7]:
loaders = {'train': train_dataloader,
          'val': valid_dataloader}

dataset_sizes = {'train': len(train_dataset),
                 'val': len(valid_dataset)}

In [8]:
def train_model(model, criterion, optimizer, scheduler, our_dataloader, num_epochs=10):
    
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')

        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in our_dataloader[phase]:
                optimizer.zero_grad()
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                # forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc

        print()
    print(f'Best val Acc: {best_acc:4f}')


In [9]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.AdamW(net.parameters(), lr=0.0005)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.75)
    
model_ft = train_model(net, 
                       criterion, 
                       optimizer_ft, 
                       exp_lr_scheduler,
                       loaders,
                       num_epochs=10)


Epoch 0/9



Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate



train Loss: 0.8174 Acc: 0.4672
val Loss: 0.7106 Acc: 0.5425

Epoch 1/9
train Loss: 0.7097 Acc: 0.5000
val Loss: 0.6925 Acc: 0.5163

Epoch 2/9
train Loss: 0.6803 Acc: 0.6270
val Loss: 0.6854 Acc: 0.5425

Epoch 3/9
train Loss: 0.6768 Acc: 0.5410
val Loss: 0.6765 Acc: 0.5556

Epoch 4/9
train Loss: 0.6423 Acc: 0.6393
val Loss: 0.6691 Acc: 0.6275

Epoch 5/9
train Loss: 0.6117 Acc: 0.6557
val Loss: 0.6608 Acc: 0.5882

Epoch 6/9
train Loss: 0.5847 Acc: 0.6762
val Loss: 0.7342 Acc: 0.5490

Epoch 7/9
train Loss: 0.5809 Acc: 0.6803
val Loss: 0.6921 Acc: 0.5490

Epoch 8/9
train Loss: 0.5625 Acc: 0.7049
val Loss: 0.7187 Acc: 0.5882

Epoch 9/9
train Loss: 0.5534 Acc: 0.7172
val Loss: 0.6614 Acc: 0.6340

Best val Acc: 0.633987


### Добавляем в предыдущую архитектуру слои BatchNorm

In [10]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 3 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 8, 3)
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.fc1 = nn.Linear(16*54*54, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)
        self.bn0 = nn.BatchNorm2d(3)
        self.bn1 = nn.BatchNorm2d(8)
        self.bn2 = nn.BatchNorm2d(16)
        self.bn3 = nn.BatchNorm1d(64)

    def forward(self, x):

        x = self.bn0(x)
        x = self.bn1(self.conv1(x))
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = self.bn2(x)

        x = F.max_pool2d(F.relu(x), 2)

        #x = self.bn2(x)
        x = x.view(-1, 16*54*54)
        x = F.relu(self.fc1(x))
        x = F.relu(self.bn3(self.fc2(x)))
        x = self.fc3(x)
        return x


net = Net().to(device)
net

Net(
  (conv1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=46656, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
  (bn0): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [11]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.AdamW(net.parameters(), lr=0.0005)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.75)
    
model_ft = train_model(net, 
                       criterion, 
                       optimizer_ft, 
                       exp_lr_scheduler,
                       loaders,
                       num_epochs=10)

Epoch 0/9
train Loss: 0.7106 Acc: 0.5656
val Loss: 0.6635 Acc: 0.5948

Epoch 1/9
train Loss: 0.6345 Acc: 0.6393
val Loss: 0.6442 Acc: 0.6405

Epoch 2/9
train Loss: 0.6229 Acc: 0.6352
val Loss: 0.6553 Acc: 0.6405

Epoch 3/9
train Loss: 0.6094 Acc: 0.6680
val Loss: 0.6717 Acc: 0.6209

Epoch 4/9
train Loss: 0.6081 Acc: 0.6230
val Loss: 0.6223 Acc: 0.6601

Epoch 5/9
train Loss: 0.5885 Acc: 0.6639
val Loss: 0.6134 Acc: 0.6601

Epoch 6/9
train Loss: 0.5863 Acc: 0.6926
val Loss: 0.6138 Acc: 0.6863

Epoch 7/9
train Loss: 0.5863 Acc: 0.7131
val Loss: 0.6995 Acc: 0.6144

Epoch 8/9
train Loss: 0.5738 Acc: 0.6557
val Loss: 0.6156 Acc: 0.7124

Epoch 9/9
train Loss: 0.5779 Acc: 0.7008
val Loss: 0.5847 Acc: 0.7059

Best val Acc: 0.712418


In [12]:
gc.collect()
torch.cuda.empty_cache()

### Создаем модель ResNet 50, инициализированную весами ImageNet

In [13]:
net = models.resnet50(pretrained=True)
in_features = net.fc.in_features
net.fc = nn.Linear(in_features, 2)
net.to(device);

In [14]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.AdamW(net.parameters(), lr=0.0005)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.75)
    
model_ft = train_model(net, 
                       criterion, 
                       optimizer_ft, 
                       exp_lr_scheduler,
                       loaders,
                       num_epochs=10)

Epoch 0/9
train Loss: 0.6949 Acc: 0.7336
val Loss: 8.3338 Acc: 0.4575

Epoch 1/9
train Loss: 0.5405 Acc: 0.7377
val Loss: 2.3833 Acc: 0.6209

Epoch 2/9
train Loss: 0.3751 Acc: 0.8361
val Loss: 0.4191 Acc: 0.9020

Epoch 3/9
train Loss: 0.3266 Acc: 0.8730
val Loss: 0.3990 Acc: 0.8497

Epoch 4/9
train Loss: 0.2974 Acc: 0.8770
val Loss: 0.3760 Acc: 0.8954

Epoch 5/9
train Loss: 0.2057 Acc: 0.9180
val Loss: 0.5597 Acc: 0.8627

Epoch 6/9
train Loss: 0.2250 Acc: 0.9098
val Loss: 0.6904 Acc: 0.8235

Epoch 7/9
train Loss: 0.1753 Acc: 0.9303
val Loss: 0.6547 Acc: 0.8235

Epoch 8/9
train Loss: 0.2299 Acc: 0.8975
val Loss: 0.8677 Acc: 0.7778

Epoch 9/9
train Loss: 0.2092 Acc: 0.9262
val Loss: 0.4680 Acc: 0.8627

Best val Acc: 0.901961


Точночть простой модели  - 0.633987

Точночть простой модели со слоями BatchNorm - 0.712418

Точность модели предобученной ResNet 50, инициализированной весами ImageNet - 0.901961