<h1>Transfer Learning<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Data-preparation" data-toc-modified-id="Data-preparation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Data preparation</a></span></li><li><span><a href="#Models-training" data-toc-modified-id="Models-training-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Models training</a></span><ul class="toc-item"><li><span><a href="#Option-1-(model-not-learned-and-change-only-last-layer)" data-toc-modified-id="Option-1-(model-not-learned-and-change-only-last-layer)-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Option 1 (model not learned and change only last layer)</a></span></li><li><span><a href="#Option-2-(model-not-learned-and-change-all-classifier)" data-toc-modified-id="Option-2-(model-not-learned-and-change-all-classifier)-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Option 2 (model not learned and change all classifier)</a></span></li></ul></li><li><span><a href="#Models-training-with-Transfer-Learning" data-toc-modified-id="Models-training-with-Transfer-Learning-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Models training with Transfer Learning</a></span><ul class="toc-item"><li><span><a href="#Option-1-(model-pre-trained,-no-frozen-pre-trained-parameters,-change-last-layer)" data-toc-modified-id="Option-1-(model-pre-trained,-no-frozen-pre-trained-parameters,-change-last-layer)-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Option 1 (model pre-trained, no frozen pre-trained parameters, change last layer)</a></span></li><li><span><a href="#Option-2-(model-pre-trained,-no-frozen-pre-trained-parameters,-change-all-classifier)" data-toc-modified-id="Option-2-(model-pre-trained,-no-frozen-pre-trained-parameters,-change-all-classifier)-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Option 2 (model pre-trained, no frozen pre-trained parameters, change all classifier)</a></span></li><li><span><a href="#Option-3-(model-pre-trained,-frozen-pre-trained-parameters,-change-all-classifier)" data-toc-modified-id="Option-3-(model-pre-trained,-frozen-pre-trained-parameters,-change-all-classifier)-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Option 3 (model pre-trained, frozen pre-trained parameters, change all classifier)</a></span></li><li><span><a href="#Finetuning" data-toc-modified-id="Finetuning-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>Finetuning</a></span></li></ul></li></ul></div>

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms, datasets
import torchvision.models as models

from tqdm import tqdm
import time

import requests
import os
import zipfile

In [2]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
device

'mps'

### Data preparation

In [3]:
DATA_URL = 'https://download.pytorch.org/tutorial/hymenoptera_data.zip'
DATA_PATH = os.path.join('.', 'TRANSFER')
FILE_NAME = os.path.join(DATA_PATH, 'hymenoptera_data.zip')

In [4]:
FILE_NAME 

'./TRANSFER/hymenoptera_data.zip'

In [5]:
if not os.path.isfile(FILE_NAME):
    print('Downloading the data...')
    os.makedirs('TRANSFER', exist_ok=True)
    with requests.get(DATA_URL) as req:
        with open(FILE_NAME, 'wb') as f:
            f.write(req.content)
    if 200 <= req.status_code < 300:
        print('Download complited')
    else:
        print('Download failed!!!')
else:
    print(FILE_NAME, 'already exists, skipping download')
    
with zipfile.ZipFile(FILE_NAME, 'r') as zip_ref:
    print('Unzipping process...')
    zip_ref.extractall('TRANSFER')

DATA_PATH = os.path.join(DATA_PATH, 'hymenoptera_data.zip')

./TRANSFER/hymenoptera_data.zip already exists, skipping download
Unzipping process...


In [6]:
DATA_PATH

'./TRANSFER/hymenoptera_data.zip'

In [7]:
!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip
!uzip hymenoptera_data.zip

zsh:1: command not found: wget
zsh:1: command not found: uzip


In [7]:
transforms_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [8]:
transforms_val = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [9]:
train_data = datasets.ImageFolder(
    '/Users/alexeyfilichkin/Desktop/PyTorch/MNIST/TRANSFER/hymenoptera_data/train',
    transform=transforms_train)

val_data = datasets.ImageFolder(
    '/Users/alexeyfilichkin/Desktop/PyTorch/MNIST/TRANSFER/hymenoptera_data/val',
    transform=transforms_val)

In [10]:
class_name = train_data.classes
class_name

['ants', 'bees']

In [11]:
print(len(train_data))
print(len(val_data))

244
153


In [12]:
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16)

### Models training

#### Option 1 (model not learned and change only last layer)

In [30]:
model_1 = models.vgg11()
model_1

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [31]:
model_1.classifier[6] = nn.Linear(4096, 2)
model_1

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [32]:
model_1 = model_1.to(device)

loss_model_1 = nn.CrossEntropyLoss()
opt_1 = torch.optim.Adam(model_1.parameters(), lr=0.001)
lr_scheduler_1 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_1, patience=5)

In [33]:
EPOCHS = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr_list = []
best_loss = None
count = 0

In [34]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_1.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_1(x)
        loss = loss_model_1(pred, targets)
        
        opt_1.zero_grad()
        loss.backward()
        
        opt_1.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_1.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
#            x = x.reshape(-1, 64*64).to(device)
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_1(x)
            loss = loss_model_1(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_1.step(mean_val_loss)
    lr = lr_scheduler_1._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
        
#    if best_loss == None:
#        best_loss = mean_val_loss
        
#    if mean_val_loss < best_loss:
#        best_loss = mean_val_loss
        
#        checkpoint = {
#            'state_model': model_reg.state_dict(),
#            'state_opt': opt_reg.state_dict(),
#            'state_lr_scheduler': lr_scheduler.state_dict(),
#            'loss': {
#                'train_loss': train_loss,
#                'val_loss': val_loss,
#                'best_loss': best_loss
#            },
#           'metric': {
#                'train_acc': train_acc,
#                'val_acc': val_acc
#            },
#            'lr': lr_list,
#            'epoch': {
#                'EPOCHS': EPOCHS,
#                'save_epoch': epoch
#            }
#        }
        
#        torch.save(checkpoint,
#                   f'/Users/alexeyfilichkin/Desktop/PyTorch/MNIST/mnist/model_saved/model_state_dict_epoch_{epoch}.pt')
#        print(f'On epoch {epoch},'
#              f' model was saved with validation los function data: {mean_val_loss:.4f}', end='\n\n')
    
#    if earlystopping(mean_val_loss):
#        print(f'\33[31mTraining was stopped on {epoch+1} epoch\033[0m')
#        break

time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=1.5851,train_acc=0.5000, val_loss=0.6896,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [2 / 10], train_loss=0.6937,train_acc=0.4795, val_loss=0.6943,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [3 / 10], train_loss=0.6949,train_acc=0.5041, val_loss=0.6975,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [4 / 10], train_loss=0.6930,train_acc=0.5041, val_loss=0.6953,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [5 / 10], train_loss=0.6925,train_acc=0.5041, val_loss=0.6947,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [6 / 10], train_loss=0.6952,train_acc=0.4877, val_loss=0.6964,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [7 / 10], train_loss=0.6943,train_acc=0.5000, val_loss=0.6950,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [8 / 10], train_loss=0.6928,train_acc=0.4959, val_loss=0.6946,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [9 / 10], train_loss=0.6935,train_acc=0.4918, val_loss=0.6948,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [10 / 10], train_loss=0.6934,train_acc=0.5000, val_loss=0.6948,val_acc=0.4575, lr=0.0001
Time of model training 10 epochs: 0m 59s


#### Option 2 (model not learned and change all classifier)

In [35]:
model_2 = models.vgg11()
model_2.classifier = nn.Linear(512*7*7, 2)
model_2

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [36]:
model_2 = model_2.to(device)

loss_model_2 = nn.CrossEntropyLoss()
opt_2 = torch.optim.Adam(model_2.parameters(), lr=0.001)
lr_scheduler_2 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_2, patience=5)

In [37]:
EPOCHS = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr_list = []
best_loss = None
count = 0

In [38]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_2.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_2(x)
        loss = loss_model_2(pred, targets)
        
        opt_2.zero_grad()
        loss.backward()
        
        opt_2.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_2.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_2(x)
            loss = loss_model_2(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_2.step(mean_val_loss)
    lr = lr_scheduler_2._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=0.9716,train_acc=0.5123, val_loss=0.6830,val_acc=0.5817, lr=0.0010


                                                                                

Epoch [2 / 10], train_loss=0.6600,train_acc=0.6066, val_loss=0.7218,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [3 / 10], train_loss=0.7017,train_acc=0.5041, val_loss=0.6970,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [4 / 10], train_loss=0.6981,train_acc=0.4344, val_loss=0.6935,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [5 / 10], train_loss=0.6932,train_acc=0.5041, val_loss=0.6940,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [6 / 10], train_loss=0.6937,train_acc=0.5041, val_loss=0.6960,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [7 / 10], train_loss=0.6938,train_acc=0.5041, val_loss=0.6959,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [8 / 10], train_loss=0.6937,train_acc=0.5041, val_loss=0.6958,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [9 / 10], train_loss=0.6927,train_acc=0.5041, val_loss=0.6956,val_acc=0.4575, lr=0.0001


                                                                                

Epoch [10 / 10], train_loss=0.6932,train_acc=0.5041, val_loss=0.6956,val_acc=0.4575, lr=0.0001
Time of model training 10 epochs: 0m 45s


### Models training with Transfer Learning

#### Option 1 (model pre-trained, no frozen pre-trained parameters, change last layer)

In [39]:
model_3 = models.vgg11(weights='DEFAULT')
model_3.classifier[6] = nn.Linear(4096, 2)
model_3

Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /Users/alexeyfilichkin/.cache/torch/hub/checkpoints/vgg11-8a719046.pth
100%|████████████████████████████████████████| 507M/507M [00:27<00:00, 19.5MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [41]:
model_3 = model_3.to(device)

loss_model_3 = nn.CrossEntropyLoss()
opt_3 = torch.optim.Adam(model_3.parameters(), lr=0.001)
lr_scheduler_3 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_3, patience=5)

In [42]:
EPOCHS = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr_list = []
best_loss = None
count = 0

In [43]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_3.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_3(x)
        loss = loss_model_3(pred, targets)
        
        opt_3.zero_grad()
        loss.backward()
        
        opt_3.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_3.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_3(x)
            loss = loss_model_3(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_3.step(mean_val_loss)
    lr = lr_scheduler_3._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=1.2145,train_acc=0.4836, val_loss=0.7439,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [2 / 10], train_loss=0.8365,train_acc=0.5574, val_loss=0.6344,val_acc=0.6405, lr=0.0010


                                                                                

Epoch [3 / 10], train_loss=0.9568,train_acc=0.5738, val_loss=0.7445,val_acc=0.3987, lr=0.0010


                                                                                

Epoch [4 / 10], train_loss=0.7177,train_acc=0.4754, val_loss=0.7004,val_acc=0.4771, lr=0.0010


                                                                                

Epoch [5 / 10], train_loss=0.7035,train_acc=0.4508, val_loss=0.7053,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [6 / 10], train_loss=0.7107,train_acc=0.5123, val_loss=0.7010,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [7 / 10], train_loss=0.6936,train_acc=0.4713, val_loss=0.6661,val_acc=0.5752, lr=0.0010


                                                                                

Epoch [8 / 10], train_loss=0.6708,train_acc=0.6107, val_loss=0.6262,val_acc=0.6209, lr=0.0010


                                                                                

Epoch [9 / 10], train_loss=0.6315,train_acc=0.6639, val_loss=0.6642,val_acc=0.6667, lr=0.0010


                                                                                

Epoch [10 / 10], train_loss=0.8155,train_acc=0.6352, val_loss=0.6615,val_acc=0.5817, lr=0.0010
Time of model training 10 epochs: 0m 58s


#### Option 2 (model pre-trained, no frozen pre-trained parameters, change all classifier)

In [44]:
model_4 = models.vgg11(weights='DEFAULT')
model_4.classifier = nn.Linear(512*7*7, 2)
model_4

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [45]:
model_4 = model_4.to(device)

loss_model_4 = nn.CrossEntropyLoss()
opt_4 = torch.optim.Adam(model_4.parameters(), lr=0.001)
lr_scheduler_4 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_4, patience=5)

In [46]:
EPOCHS = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr_list = []
best_loss = None
count = 0

In [47]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_4.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_4(x)
        loss = loss_model_4(pred, targets)
        
        opt_4.zero_grad()
        loss.backward()
        
        opt_4.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_4.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_4(x)
            loss = loss_model_4(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_4.step(mean_val_loss)
    lr = lr_scheduler_4._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=1.0428,train_acc=0.4098, val_loss=0.7025,val_acc=0.4902, lr=0.0010


                                                                                

Epoch [2 / 10], train_loss=0.6954,train_acc=0.5451, val_loss=0.6927,val_acc=0.5359, lr=0.0010


                                                                                

Epoch [3 / 10], train_loss=0.6954,train_acc=0.4795, val_loss=0.6796,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [4 / 10], train_loss=0.6897,train_acc=0.4959, val_loss=0.6843,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [5 / 10], train_loss=0.6838,train_acc=0.5287, val_loss=0.6509,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [6 / 10], train_loss=0.6674,train_acc=0.5615, val_loss=0.6326,val_acc=0.5425, lr=0.0010


                                                                                

Epoch [7 / 10], train_loss=0.6764,train_acc=0.5328, val_loss=0.6415,val_acc=0.6078, lr=0.0010


                                                                                

Epoch [8 / 10], train_loss=0.6751,train_acc=0.5861, val_loss=0.6470,val_acc=0.6601, lr=0.0010


                                                                                

Epoch [9 / 10], train_loss=0.6392,train_acc=0.5943, val_loss=1.4628,val_acc=0.4575, lr=0.0010


                                                                                

Epoch [10 / 10], train_loss=0.7054,train_acc=0.5779, val_loss=0.6248,val_acc=0.5621, lr=0.0010
Time of model training 10 epochs: 0m 44s


#### Option 3 (model pre-trained, frozen pre-trained parameters, change all classifier)

In [48]:
model_5 = models.vgg11(weights='DEFAULT')

for param in model_5.parameters():
    param.requires_grad = False

model_5.classifier = nn.Linear(512*7*7, 2)
model_5

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [49]:
for name, layer in model_5.named_modules():
    if isinstance(layer, (nn.Conv2d, nn.Linear)):
        print(name)
        for i, param in enumerate(layer.parameters()):
            if i == 0:
                print(f'    weights.requires_grad = {param.requires_grad}')
            else:
                print(f'    bias.requires_grad = {param.requires_grad}', end='\n\n')

features.0
    weights.requires_grad = False
    bias.requires_grad = False

features.3
    weights.requires_grad = False
    bias.requires_grad = False

features.6
    weights.requires_grad = False
    bias.requires_grad = False

features.8
    weights.requires_grad = False
    bias.requires_grad = False

features.11
    weights.requires_grad = False
    bias.requires_grad = False

features.13
    weights.requires_grad = False
    bias.requires_grad = False

features.16
    weights.requires_grad = False
    bias.requires_grad = False

features.18
    weights.requires_grad = False
    bias.requires_grad = False

classifier
    weights.requires_grad = True
    bias.requires_grad = True



In [50]:
model_5 = model_5.to(device)

loss_model_5 = nn.CrossEntropyLoss()
opt_5 = torch.optim.Adam(model_5.classifier.parameters(), lr=0.001)
lr_scheduler_5 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_5, patience=5)

In [51]:
EPOCHS = 10
train_loss = []
train_acc = []
val_loss = []
val_acc = []
lr_list = []
best_loss = None
count = 0

In [52]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_5.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_5(x)
        loss = loss_model_5(pred, targets)
        
        opt_5.zero_grad()
        loss.backward()
        
        opt_5.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_5.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_5(x)
            loss = loss_model_5(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_5.step(mean_val_loss)
    lr = lr_scheduler_5._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=0.5447,train_acc=0.8197, val_loss=0.5968,val_acc=0.9150, lr=0.0010


                                                                                

Epoch [2 / 10], train_loss=0.2827,train_acc=0.9303, val_loss=0.6538,val_acc=0.9216, lr=0.0010


                                                                                

Epoch [3 / 10], train_loss=0.2485,train_acc=0.9549, val_loss=0.5857,val_acc=0.9085, lr=0.0010


                                                                                

Epoch [4 / 10], train_loss=0.2861,train_acc=0.9139, val_loss=1.1071,val_acc=0.8954, lr=0.0010


                                                                                

Epoch [5 / 10], train_loss=0.1329,train_acc=0.9467, val_loss=0.7886,val_acc=0.9216, lr=0.0010


                                                                                

Epoch [6 / 10], train_loss=0.0462,train_acc=0.9877, val_loss=0.6676,val_acc=0.9281, lr=0.0010


                                                                                

Epoch [7 / 10], train_loss=0.1124,train_acc=0.9836, val_loss=0.8134,val_acc=0.9216, lr=0.0010


                                                                                

Epoch [8 / 10], train_loss=0.0265,train_acc=0.9877, val_loss=0.7006,val_acc=0.9150, lr=0.0010


                                                                                

Epoch [9 / 10], train_loss=0.0211,train_acc=0.9959, val_loss=0.7063,val_acc=0.9281, lr=0.0001


                                                                                

Epoch [10 / 10], train_loss=0.0244,train_acc=0.9795, val_loss=0.7154,val_acc=0.9150, lr=0.0001
Time of model training 10 epochs: 0m 25s


####  Finetuning

In [53]:
model_5.features[13:]

Sequential(
  (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (14): ReLU(inplace=True)
  (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (19): ReLU(inplace=True)
  (20): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [54]:
for param in model_5.features[13:].parameters():
    param.requires_grad = True

In [55]:
for name, layer in model_5.named_modules():
    if isinstance(layer, (nn.Conv2d, nn.Linear)):
        print(name)
        for i, param in enumerate(layer.parameters()):
            if i == 0:
                print(f'    weights.requires_grad = {param.requires_grad}')
            else:
                print(f'    bias.requires_grad = {param.requires_grad}', end='\n\n')

features.0
    weights.requires_grad = False
    bias.requires_grad = False

features.3
    weights.requires_grad = False
    bias.requires_grad = False

features.6
    weights.requires_grad = False
    bias.requires_grad = False

features.8
    weights.requires_grad = False
    bias.requires_grad = False

features.11
    weights.requires_grad = False
    bias.requires_grad = False

features.13
    weights.requires_grad = True
    bias.requires_grad = True

features.16
    weights.requires_grad = True
    bias.requires_grad = True

features.18
    weights.requires_grad = True
    bias.requires_grad = True

classifier
    weights.requires_grad = True
    bias.requires_grad = True



In [56]:
model_5 = model_5.to(device)

loss_model_5 = nn.CrossEntropyLoss()
opt_5 = torch.optim.Adam(
    [
        {'params': model_5.features[13:].parameters(), 'lr': 0.000001},
        {'params': model_5.classifier.parameters()},
    ],
    lr=0.0001
)
lr_scheduler_5 = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_5, patience=5)

In [57]:
start = time.time()

for epoch in range(1, EPOCHS+1):
    
    model_5.train()
    running_train_loss = []
    true_answer = 0
    
    train_loop = tqdm(train_loader, leave=False)
    for x, targets in train_loop:
        x = x.to(device)
        targets = targets.reshape(-1).to(torch.int32)
        targets = torch.eye(2)[targets].to(device)
        
        pred = model_5(x)
        loss = loss_model_5(pred, targets)
        
        opt_5.zero_grad()
        loss.backward()
        
        opt_5.step()
        
        running_train_loss.append(loss.item())
        mean_train_loss = sum(running_train_loss) / len(running_train_loss)
        
        true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
        train_loop.set_description(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f}')
    
    running_train_acc = true_answer / len(train_data)
    
    train_loss.append(mean_train_loss)
    train_acc.append(running_train_acc)
    
    model_5.eval()
    with torch.no_grad():
        running_val_loss = []
        true_answer = 0
        
        for x, targets in val_loader:
            x = x.to(device)
            targets = targets.reshape(-1).to(torch.int32)
            targets = torch.eye(2)[targets].to(device)
            
            pred = model_5(x)
            loss = loss_model_5(pred, targets)
            
            running_val_loss.append(loss.item())
            mean_val_loss = sum(running_val_loss) / len(running_val_loss)
            
            true_answer += (pred.argmax(dim=1) == targets.argmax(dim=1)).sum().item()
            
        running_val_acc = true_answer / len(val_data)
        
        val_loss.append(mean_val_loss)
        val_acc.append(running_val_acc)
        
        
    
    lr_scheduler_5.step(mean_val_loss)
    lr = lr_scheduler_5._last_lr[0]
    lr_list.append(lr)
    
#    if epoch == 1 or epoch % 5 == 0:
    print(f'Epoch [{epoch} / {EPOCHS}], train_loss={mean_train_loss:.4f},'
          f'train_acc={running_train_acc:.4f}, val_loss={mean_val_loss:.4f},'
          f'val_acc={running_val_acc:.4f}, lr={lr:.4f}'
          )
time_model = time.time() - start
print(f'Time of model training {EPOCHS} epochs: {time_model // 60:.0f}m {time_model % 60:.0f}s')

                                                                                

Epoch [1 / 10], train_loss=0.0485,train_acc=0.9836, val_loss=0.7463,val_acc=0.9216, lr=0.0000


                                                                                

Epoch [2 / 10], train_loss=0.0209,train_acc=0.9918, val_loss=0.6431,val_acc=0.9216, lr=0.0000


                                                                                

Epoch [3 / 10], train_loss=0.0034,train_acc=1.0000, val_loss=0.6834,val_acc=0.9412, lr=0.0000


                                                                                

Epoch [4 / 10], train_loss=0.0056,train_acc=1.0000, val_loss=0.6421,val_acc=0.9281, lr=0.0000


                                                                                

Epoch [5 / 10], train_loss=0.0088,train_acc=1.0000, val_loss=0.7654,val_acc=0.9150, lr=0.0000


                                                                                

Epoch [6 / 10], train_loss=0.0040,train_acc=0.9959, val_loss=0.6987,val_acc=0.9281, lr=0.0000


                                                                                

Epoch [7 / 10], train_loss=0.0029,train_acc=1.0000, val_loss=0.7642,val_acc=0.9346, lr=0.0000


                                                                                

Epoch [8 / 10], train_loss=0.0084,train_acc=0.9959, val_loss=0.8380,val_acc=0.9216, lr=0.0000


                                                                                

Epoch [9 / 10], train_loss=0.0062,train_acc=0.9959, val_loss=0.6270,val_acc=0.9477, lr=0.0000


                                                                                

Epoch [10 / 10], train_loss=0.0019,train_acc=1.0000, val_loss=0.7194,val_acc=0.9412, lr=0.0000
Time of model training 10 epochs: 0m 30s
