In [1]:
import torch
import torchvision
from torchvision import datasets, transforms, models
import torch.nn.functional as F
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR, ExponentialLR

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.special import binom

import cv2
import os
import math
import time
import datetime
import warnings

In [2]:
# Configuration

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pd.set_option('display.max_colwidth', 200)

# files path
image_path = "CUB_200_2011/images"

train = pd.read_csv('PATH/train.csv')
test = pd.read_csv('PATH/test.csv')

In [3]:
class CUB200(Dataset):
    
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        
        label = self.df.Label.values[idx]
        filename = self.df.filename.values[idx]
        
        p_path = os.path.join(image_path, filename)
        
        image = cv2.imread(p_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transforms.ToPILImage()(image)
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

In [4]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([transforms.Resize((600, 600)),
                                      transforms.RandomCrop((448, 448)),
                                      transforms.Resize((448, 448)),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean, std)
])

test_transform = transforms.Compose([transforms.Resize((600, 600)),
                                     transforms.CenterCrop((448, 448)),
                                     transforms.Resize((448, 448)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean, std)
])

trainset = CUB200(train, transform=train_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True, num_workers=4)

testset = CUB200(test, transform=test_transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False, num_workers=4)

In [5]:
class ArcMargin(nn.Module):
    def __init__(self, in_feat, out_feat, s=30.0, m1=0.50):
        super(ArcMargin, self).__init__()
        self.in_feat = in_feat
        self.out_feat = out_feat
        self.s = s
        self.m1 = m1
        self.weight = Parameter(torch.FloatTensor(out_feat, in_feat))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, input, label):
        #---------------------------- Margin Additional -----------------------------
        cos_m = math.cos(self.m1)
        sin_m = math.sin(self.m1)
        th = math.cos(math.pi - self.m1)
        mm = math.sin(math.pi - self.m1) * self.m1
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * cos_m - sine * sin_m
        phi = torch.where(cosine > th, phi, cosine - mm)
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size()).to(device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  
        output *= self.s

        return self.s * cosine, output

In [6]:
class BaseModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        plane = 512
        if model_name == 'resnet50':
            backbone = nn.Sequential(*list(models.resnet50(pretrained=pretrained).children())[:-2])
            plane = 2048 * 1 * 1
        elif model_name == 'resnet101':
            backbone = nn.Sequential(*list(models.resnet101(pretrained=pretrained).children())[:-2])
            plane = 2048 * 1 * 1
        else:
            backbone = None
        
        self.backbone = backbone
        self.mpool = nn.AdaptiveMaxPool2d((1, 1))
        self.apool = nn.AdaptiveAvgPool2d((1, 1))
        
    def forward(self, x):
        
        feat = self.backbone(x)
        out = self.mpool(feat)
        out = out.view(out.size(0), -1)
        return out
    
class Dense(nn.Module):

    def __init__(self):
        super(Dense, self).__init__()

        self.bn = nn.BatchNorm1d(2048)
        self.fc = nn.Linear(2048, 600)

    def forward(self, inputs: torch.Tensor):

        out = self.fc(self.bn(inputs))
        out = nn.ELU(inplace=True)(out)
        return out

In [7]:
lr = 0.0001
base_lr = 0.01
weight_decay = 1e-4
num_classes = 200

Model = BaseModel('resnet50', pretrained=True).to(device)
Dense = Dense().to(device)
Arcface = ArcMargin(600, num_classes, s=64, m1=0.5).to(device) # 78.62
criterion = nn.CrossEntropyLoss()

# optimizer
base_opt = torch.optim.SGD(Model.parameters(), 
                            lr=lr, 
                            momentum=0.9,
                            weight_decay=weight_decay, 
                            nesterov=True)

optimizer = torch.optim.SGD([{'params': Dense.parameters()}, {'params': Arcface.parameters()}], 
                            lr=base_lr, 
                            momentum=0.9,
                            weight_decay=weight_decay, 
                            nesterov=True)

# optimization scheduler
basesc = torch.optim.lr_scheduler.MultiStepLR(base_opt, milestones=[60], gamma=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[45, 75], gamma=0.1)

In [8]:
epochs = 100
save_model_path = 'Checkpoint'
steps = 0
running_loss = 0

print('Start fine-tuning...')
best_acc = 0.
best_epoch = None

for epoch in range(1, epochs+1):
    
    start_time = time.time()
    for idx, data in enumerate(train_loader):
        steps += 1
        
        # Move input and label tensors to the default device
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        base_opt.zero_grad(), optimizer.zero_grad()
        
        output = Model(inputs)
        output = Dense(output)
        _, output = Arcface(output, labels)
        loss = criterion(output, labels)

        loss.backward()
        optimizer.step()
        base_opt.step()

        running_loss += loss.item()
    stop_time = time.time()
    print('Epoch {}/{} and used time: {:.2f} sec.'.format(epoch, epochs, stop_time - start_time))
    
    Model.eval(), Arcface.eval(), Dense.eval()
    for name, loader in [("train", train_loader), ("test", test_loader)]:
        _loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for data in loader:
                
                imgs, labels = data
                imgs, labels = imgs.to(device), labels.to(device)
                
                output = Model(imgs)
                output = Dense(output)
                cosine, output = Arcface(output, labels)
                loss = criterion(output, labels)
                _loss += loss.item()
                
                result = F.softmax(cosine, dim=1)
                _, predicted = torch.max(result, dim=1)
                
                total += labels.shape[0]
                correct += int((predicted == labels).sum())
            _acc = 100 * correct  / total
            _loss = _loss / len(loader)
            
        print('{} loss: {:.4f}    {} accuracy: {:.4f}'.format(name, _loss, name, _acc))
    print()
    
    running_loss = 0
    Model.train(), Arcface.train(), Dense.train()
    scheduler.step()
    basesc.step()
    
    if _acc > best_acc:
        best_acc = _acc
        best_epoch = epoch

print('After the training, the end of the epoch {}, the highest accuracy is {:.2f}'.format(best_epoch, best_acc))

Start fine-tuning...
Epoch 1/100 and used time: 84.10 sec.
train loss: 22.3279    train accuracy: 74.5412
test loss: 25.3362    test accuracy: 65.8440

Epoch 2/100 and used time: 84.11 sec.
train loss: 13.9210    train accuracy: 87.1205
test loss: 18.3453    test accuracy: 78.0635

Epoch 3/100 and used time: 83.99 sec.
train loss: 9.7285    train accuracy: 91.9419
test loss: 15.4732    test accuracy: 80.9631

Epoch 4/100 and used time: 87.22 sec.
train loss: 7.0617    train accuracy: 94.8115
test loss: 13.9987    test accuracy: 82.8788

Epoch 5/100 and used time: 86.78 sec.
train loss: 5.7878    train accuracy: 95.7958
test loss: 13.9552    test accuracy: 82.6890

Epoch 6/100 and used time: 85.86 sec.
train loss: 4.3695    train accuracy: 96.8135
test loss: 13.1024    test accuracy: 83.2068

Epoch 7/100 and used time: 86.07 sec.
train loss: 3.4300    train accuracy: 97.5475
test loss: 12.8134    test accuracy: 83.4484

Epoch 8/100 and used time: 85.80 sec.
train loss: 2.3551    train a