In [1]:
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
import torch.nn.functional as F

import matplotlib
import matplotlib.pyplot as plt

import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms

import numpy

from IPython.display import Image
from tqdm.notebook import tqdm
from math import *


In [2]:
# Set dataset and model

# Dataset
dataset = torchvision.datasets.Flowers102

from torchvision.models import resnet152
model = resnet152(pretrained=True)

model_filename = "flowers102-resnet152"

warmup_epoch = 10
num_epoch = 30

lr_warmup = 5e-3
lr_base = 5e-2
lr_min = 5e-3

device = "cuda:3" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda:3'

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(224),
    transforms.CenterCrop(224)
])
train_set = torchvision.datasets.Flowers102("./data", split= "train", download = True, transform=transform)
test_set = torchvision.datasets.Flowers102("./data", split= "test", download = True, transform=transform)
val_set = torchvision.datasets.Flowers102("./data", split= "val", download = True, transform=transform)
train_loader = DataLoader(train_set, batch_size=512, shuffle=True)
test_loader = DataLoader(test_set, batch_size=512, shuffle=True)
val_loader = DataLoader(val_set, batch_size=512, shuffle=True)

In [5]:
in_features = model.fc.in_features
in_features

2048

In [6]:
model.fc = nn.Linear(in_features, 102) # set in_features and out_features
model.train()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
# Freeze layers
for name, param in model.named_parameters():
    if ('fc' in name) or ('layer4' in name): # or ('layer3' in name)
        print("Unfreeze " + name)
        param.requires_grad = True
    else:
        print("Freeze " + name)
        param.requires_grad = False


Freeze conv1.weight
Freeze bn1.weight
Freeze bn1.bias
Freeze layer1.0.conv1.weight
Freeze layer1.0.bn1.weight
Freeze layer1.0.bn1.bias
Freeze layer1.0.conv2.weight
Freeze layer1.0.bn2.weight
Freeze layer1.0.bn2.bias
Freeze layer1.0.conv3.weight
Freeze layer1.0.bn3.weight
Freeze layer1.0.bn3.bias
Freeze layer1.0.downsample.0.weight
Freeze layer1.0.downsample.1.weight
Freeze layer1.0.downsample.1.bias
Freeze layer1.1.conv1.weight
Freeze layer1.1.bn1.weight
Freeze layer1.1.bn1.bias
Freeze layer1.1.conv2.weight
Freeze layer1.1.bn2.weight
Freeze layer1.1.bn2.bias
Freeze layer1.1.conv3.weight
Freeze layer1.1.bn3.weight
Freeze layer1.1.bn3.bias
Freeze layer1.2.conv1.weight
Freeze layer1.2.bn1.weight
Freeze layer1.2.bn1.bias
Freeze layer1.2.conv2.weight
Freeze layer1.2.bn2.weight
Freeze layer1.2.bn2.bias
Freeze layer1.2.conv3.weight
Freeze layer1.2.bn3.weight
Freeze layer1.2.bn3.bias
Freeze layer2.0.conv1.weight
Freeze layer2.0.bn1.weight
Freeze layer2.0.bn1.bias
Freeze layer2.0.conv2.weight
F

In [8]:
# Reading whole dataloader into memory can improve the speed of training
train_loader = list(train_loader)
test_loader = list(test_loader)

In [9]:
train_set

Dataset Flowers102
    Number of datapoints: 1020
    Root location: ./data
    split=train
    StandardTransform
Transform: Compose(
               ToTensor()
               Resize(size=224, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
           )

In [10]:
loss_function = nn.CrossEntropyLoss()

In [11]:
lr = lr_base
def adjust_learning_rate(optimizer, current_epoch, max_epoch, lr_min=lr_min, lr_max=lr_base, warmup=True):
    if current_epoch < warmup_epoch:
        lr = lr_max * (current_epoch+1) / (warmup_epoch+1)
    else:
        lr = lr_min + (lr_max-lr_min)*(1 + cos(pi * (current_epoch - warmup_epoch) / (max_epoch - warmup_epoch))) / 2
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    print("Learning rate is set to "+str(lr))

optimiser = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), 
                      lr=lr,
                      momentum=0.9) #only optimse non-frozen layers
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [12]:
results = []

for epoch in range(warmup_epoch+num_epoch):
    running_loss = 0
    train_acc = 0
    
    adjust_learning_rate(optimizer=optimiser,
                        current_epoch=epoch,
                        max_epoch=warmup_epoch+num_epoch)
    with tqdm(train_loader, desc='Train(epoch'+str(epoch)+')') as t:
        total = 0
        correct = 0
        for data in t:
            model.train()
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimiser.zero_grad()

            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            running_loss += loss.item()
            loss.backward()
            optimiser.step()
            
            pred = torch.argmax(F.softmax(outputs), dim=1)
            total += len(labels)
            correct += sum(pred == labels)
            
        train_acc = (100.0 * correct) / total

        t.set_postfix(running_loss=running_loss,
                      runing_acc=train_acc)
            
    print("epoch %d/%d:(tr)loss=%.4f" % (epoch, warmup_epoch+num_epoch, running_loss))
    print("epoch %d/%d:(tr)acc=%.4f%%" % (epoch, warmup_epoch+num_epoch, train_acc))
    
    test_running_loss = 0
    test_acc = 0
    
    if epoch%10==9:
        
        with tqdm(test_loader, desc='test'+str(epoch)) as t:
            with torch.no_grad():
                total = 0
                correct = 0
                for data in t:
                    model.eval()
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)

                    outputs = model(inputs)

                    loss = loss_function(outputs, labels)
                    test_running_loss += loss.item()

                    pred = torch.argmax(F.softmax(outputs), dim=1)
                    total += len(labels)
                    correct += sum(pred == labels)
                test_acc = (100.0 * correct) / total

                t.set_postfix(running_loss=test_running_loss,
                              runing_acc=test_acc)

        print("epoch %d/%d:(te)loss=%.4f" % (epoch, warmup_epoch+num_epoch, test_running_loss))
        print("epoch %d/%d:(te)acc=%.4f%%" % (epoch, warmup_epoch+num_epoch, test_acc))
          
    results.append({'running_loss':running_loss,
                   'train_acc':train_acc,
                   'test_running_loss':test_running_loss,
                   'test_acc':test_acc})


Learning rate is set to 0.004545454545454546


Train(epoch0):   0%|          | 0/2 [00:00<?, ?it/s]



epoch 0/40:(tr)loss=9.3665
epoch 0/40:(tr)acc=0.7843%
Learning rate is set to 0.009090909090909092


Train(epoch1):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 1/40:(tr)loss=9.2811
epoch 1/40:(tr)acc=1.4706%
Learning rate is set to 0.01363636363636364


Train(epoch2):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 2/40:(tr)loss=9.0229
epoch 2/40:(tr)acc=3.9216%
Learning rate is set to 0.018181818181818184


Train(epoch3):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 3/40:(tr)loss=8.5549
epoch 3/40:(tr)acc=22.6471%
Learning rate is set to 0.022727272727272728


Train(epoch4):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 4/40:(tr)loss=7.8621
epoch 4/40:(tr)acc=50.0000%
Learning rate is set to 0.02727272727272728


Train(epoch5):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 5/40:(tr)loss=6.8968
epoch 5/40:(tr)acc=70.1961%
Learning rate is set to 0.03181818181818182


Train(epoch6):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 6/40:(tr)loss=5.6184
epoch 6/40:(tr)acc=87.0588%
Learning rate is set to 0.03636363636363637


Train(epoch7):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 7/40:(tr)loss=4.1769
epoch 7/40:(tr)acc=93.7255%
Learning rate is set to 0.04090909090909091


Train(epoch8):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 8/40:(tr)loss=2.8440
epoch 8/40:(tr)acc=96.2745%
Learning rate is set to 0.045454545454545456


Train(epoch9):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 9/40:(tr)loss=1.7427
epoch 9/40:(tr)acc=97.2549%


test9:   0%|          | 0/13 [00:00<?, ?it/s]



epoch 9/40:(te)loss=19.1483
epoch 9/40:(te)acc=75.6708%
Learning rate is set to 0.05


Train(epoch10):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 10/40:(tr)loss=0.9684
epoch 10/40:(tr)acc=98.0392%
Learning rate is set to 0.04987674264578615


Train(epoch11):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 11/40:(tr)loss=0.5130
epoch 11/40:(tr)acc=99.2157%
Learning rate is set to 0.049508321016510634


Train(epoch12):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 12/40:(tr)loss=0.2843
epoch 12/40:(tr)acc=99.5098%
Learning rate is set to 0.04889877161664096


Train(epoch13):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 13/40:(tr)loss=0.1662
epoch 13/40:(tr)acc=99.8039%
Learning rate is set to 0.04805477279695852


Train(epoch14):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 14/40:(tr)loss=0.0992
epoch 14/40:(tr)acc=100.0000%
Learning rate is set to 0.04698557158514988


Train(epoch15):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 15/40:(tr)loss=0.0608
epoch 15/40:(tr)acc=100.0000%
Learning rate is set to 0.04570288237343632


Train(epoch16):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 16/40:(tr)loss=0.0396
epoch 16/40:(tr)acc=100.0000%
Learning rate is set to 0.04422075857324137


Train(epoch17):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 17/40:(tr)loss=0.0278
epoch 17/40:(tr)acc=100.0000%
Learning rate is set to 0.04255543864307431


Train(epoch18):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 18/40:(tr)loss=0.0209
epoch 18/40:(tr)acc=100.0000%
Learning rate is set to 0.040725168176580645


Train(epoch19):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 19/40:(tr)loss=0.0165
epoch 19/40:(tr)acc=100.0000%


test19:   0%|          | 0/13 [00:00<?, ?it/s]

epoch 19/40:(te)loss=6.8384
epoch 19/40:(te)acc=87.7541%
Learning rate is set to 0.03875


Train(epoch20):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 20/40:(tr)loss=0.0136
epoch 20/40:(tr)acc=100.0000%
Learning rate is set to 0.03665157446920551


Train(epoch21):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 21/40:(tr)loss=0.0116
epoch 21/40:(tr)acc=100.0000%
Learning rate is set to 0.03445288237343632


Train(epoch22):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 22/40:(tr)loss=0.0102
epoch 22/40:(tr)acc=100.0000%
Learning rate is set to 0.03217801304339958


Train(epoch23):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 23/40:(tr)loss=0.0091
epoch 23/40:(tr)acc=100.0000%
Learning rate is set to 0.02985189042352221


Train(epoch24):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 24/40:(tr)loss=0.0084
epoch 24/40:(tr)acc=100.0000%
Learning rate is set to 0.027500000000000007


Train(epoch25):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 25/40:(tr)loss=0.0078
epoch 25/40:(tr)acc=100.0000%
Learning rate is set to 0.025148109576477805


Train(epoch26):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 26/40:(tr)loss=0.0074
epoch 26/40:(tr)acc=100.0000%
Learning rate is set to 0.022821986956600416


Train(epoch27):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 27/40:(tr)loss=0.0071
epoch 27/40:(tr)acc=100.0000%
Learning rate is set to 0.020547117626563687


Train(epoch28):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 28/40:(tr)loss=0.0068
epoch 28/40:(tr)acc=100.0000%
Learning rate is set to 0.0183484255307945


Train(epoch29):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 29/40:(tr)loss=0.0066
epoch 29/40:(tr)acc=100.0000%


test29:   0%|          | 0/13 [00:00<?, ?it/s]

epoch 29/40:(te)loss=6.1674
epoch 29/40:(te)acc=88.9576%
Learning rate is set to 0.016250000000000007


Train(epoch30):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 30/40:(tr)loss=0.0065
epoch 30/40:(tr)acc=100.0000%
Learning rate is set to 0.014274831823419359


Train(epoch31):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 31/40:(tr)loss=0.0064
epoch 31/40:(tr)acc=100.0000%
Learning rate is set to 0.012444561356925697


Train(epoch32):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 32/40:(tr)loss=0.0063
epoch 32/40:(tr)acc=100.0000%
Learning rate is set to 0.010779241426758636


Train(epoch33):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 33/40:(tr)loss=0.0062
epoch 33/40:(tr)acc=100.0000%
Learning rate is set to 0.009297117626563685


Train(epoch34):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 34/40:(tr)loss=0.0061
epoch 34/40:(tr)acc=100.0000%
Learning rate is set to 0.00801442841485013


Train(epoch35):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 35/40:(tr)loss=0.0061
epoch 35/40:(tr)acc=100.0000%
Learning rate is set to 0.006945227203041479


Train(epoch36):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 36/40:(tr)loss=0.0060
epoch 36/40:(tr)acc=100.0000%
Learning rate is set to 0.006101228383359046


Train(epoch37):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 37/40:(tr)loss=0.0060
epoch 37/40:(tr)acc=100.0000%
Learning rate is set to 0.005491678983489372


Train(epoch38):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 38/40:(tr)loss=0.0060
epoch 38/40:(tr)acc=100.0000%
Learning rate is set to 0.005123257354213848


Train(epoch39):   0%|          | 0/2 [00:00<?, ?it/s]

epoch 39/40:(tr)loss=0.0060
epoch 39/40:(tr)acc=100.0000%


test39:   0%|          | 0/13 [00:00<?, ?it/s]

epoch 39/40:(te)loss=6.1485
epoch 39/40:(te)acc=89.0226%


In [13]:
results

[{'running_loss': 9.36653470993042,
  'train_acc': tensor(0.7843, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 9.281102657318115,
  'train_acc': tensor(1.4706, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 9.02288007736206,
  'train_acc': tensor(3.9216, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 8.554941654205322,
  'train_acc': tensor(22.6471, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 7.862131357192993,
  'train_acc': tensor(50.0000, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 6.896833896636963,
  'train_acc': tensor(70.1961, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 5.618419647216797,
  'train_acc': tensor(87.0588, device='cuda:3'),
  'test_running_loss': 0,
  'test_acc': 0},
 {'running_loss': 4.1769222021102905,
  'train_acc': tensor(93.7255, device='cuda:3'),
  'test_

In [14]:
torch.save(model.state_dict(), "model/" + model_filename)

In [15]:
train_loss_list = []
train_acc_list = []
for result in results:
    train_loss_list.append(result["running_loss"])
    train_acc_list.append(torch.Tensor.cpu(result["train_acc"]))

In [None]:
test_loss_list = []
test_acc_list = []
for result in results:
    test_loss_list.append(result["test_running_loss"])
    test_acc_list.append(torch.Tensor.cpu(result["test_acc"]))

In [None]:
val_loss_list = []
val_acc_list = []
for result in results:
    val_loss_list.append(result["val_loss"])
    val_acc_list.append(result["val_acc"])

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10,4))

ax[0].plot(train_loss_list)
ax[0].plot(test_loss_list)
ax[0].legend(['train','test'])
ax[0].grid()
ax[0].set_title("Loss")

ax[1].plot(train_acc_list)
ax[1].plot(test_acc_list)
ax[1].legend(['train','test'])
ax[1].grid()
ax[1].set_title("Accuracy")
plt.show()