In [1]:
import torch
import torchvision
from torchvision import datasets, transforms, models
import torch.nn.functional as F
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR, ExponentialLR

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy.special import binom

import cv2
import os
import math
import time
import datetime
import warnings

# from efficientnet_pytorch import EfficientNet

In [2]:
# Configuration

pd.set_option('display.max_colwidth', 200)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# files path
train_path = "../input/stanford-cars-dataset/cars_train/cars_train"
test_path = "../input/stanford-cars-dataset/cars_test/cars_test"

csv_path = '../input/stanford-cars-csv-annotation'

csv_train = 'train.csv'
csv_test = 'test.csv'

In [3]:
train = pd.read_csv(os.path.join(csv_path, csv_train))
test = pd.read_csv(os.path.join(csv_path, csv_test))

train.head()

Unnamed: 0,bbox_x1,bbox_y1,bbox_x2,bbox_y2,Category,filename,class_name,Labels
0,39,116,569,375,14,00001.jpg,Audi TTS Coupe 2012,13
1,36,116,868,587,3,00002.jpg,Acura TL Sedan 2012,2
2,85,109,601,381,91,00003.jpg,Dodge Dakota Club Cab 2007,90
3,621,393,1484,1096,134,00004.jpg,Hyundai Sonata Hybrid Sedan 2012,133
4,14,36,133,99,106,00005.jpg,Ford F-450 Super Duty Crew Cab 2012,105


In [4]:
class CARS(Dataset):
    
    def __init__(self, dataframe, train=True, transform=None):
        self.df = dataframe
        self.train = train
        self.transform = transform
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        
        label = self.df.Labels.values[idx]
        filename = self.df.filename.values[idx]
        
        if self.train:
            image_path = train_path
        else:
            image_path = test_path
        
        p_path = os.path.join(image_path, filename)
        
        image = cv2.imread(p_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transforms.ToPILImage()(image)
        
        if self.transform is not None:
            image = self.transform(image)
        
        return image, label

In [5]:
mean = (0.4708, 0.4602, 0.4550)
sd = (0.2626, 0.2618, 0.2667)
train_transform = transforms.Compose([transforms.Resize((448, 448)),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor()                                    
])

test_transform = transforms.Compose([transforms.Resize((448, 448)),
                                     transforms.ToTensor()
])


trainset = CARS(train, train=True, transform=train_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=train_batch, shuffle=True, num_workers=4)

testset = CARS(test, train=False, transform=test_transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=test_batch, shuffle=False, num_workers=4)

In [7]:
class ArcMarginProduct(nn.Module):
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin

            cos(theta + m)
        """
    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.s = s
        self.m = m
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m

    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        # one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s
        # print(output)

        return cosine, output
    
class CfeatureExtraction(nn.Module):

    def __init__(self):
        super(CfeatureExtraction, self).__init__()
        xfeatures = models.densenet161(pretrained=True)
        self.xfeatures = nn.Sequential(*list(xfeatures.features)[ : ])
        
        # if self.classifier.bias is not None:
        #     nn.init.constant_(self.classifier.bias.data, val=0)
        
    """ Training CNN with Arcface loss """
    def forward(self, inputs: torch.Tensor):
        
        features = self.xfeatures(inputs)               # extract features from pretrained base
        features = F.avg_pool2d(features, kernel_size=features.size()[2: ])
        features = features.view(features.size()[0], -1)
        return features

class fconnected(nn.Module):

    def __init__(self):
        super(fconnected, self).__init__()
        self.fc = nn.Linear(2208, 588)
        self.bnorm = nn.BatchNorm1d(588)

        nn.init.kaiming_normal_(self.fc.weight.data)

    def forward(self, inputs: torch.Tensor):

        logit = self.fc(inputs)
        logit = self.bnorm(logit)
        return logit

# models = Net(num_classes = num_classes).to(device)
# models(torch.randn(5, 3, 448, 448).to(device)).shape

In [8]:
tune_lr = 0.0001
base_lr = 0.001
weight_decay = 5e-4
num_classes = 196

F_Extraction = CfeatureExtraction().to(device)
# numel_list = [p.numel() for p in F_Extraction.parameters()]
# print(sum(numel_list))
F_Connected = fconnected().to(device)
Arcface = ArcMarginProduct(588, num_classes, s=64, m=0.5).to(device)

criterion = nn.CrossEntropyLoss()

Extraction_opt = torch.optim.SGD(F_Extraction.parameters(), lr=tune_lr, momentum=0.9, weight_decay=weight_decay, nesterov=True)
Extraction_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(Extraction_opt, mode='max', factor=0.1, patience=4, min_lr=1e-7, verbose=True)

fullycon_opt = torch.optim.SGD(F_Connected.parameters(), lr=base_lr, momentum=0.9, nesterov=True)
fullycon_scheduler = torch.optim.lr_scheduler.MultiStepLR(fullycon_opt, milestones=[30,50], gamma=0.1)

Arcface_opt = torch.optim.SGD(Arcface.parameters(), lr=base_lr, momentum=0.9, nesterov=True)
Arcface_scheduler = torch.optim.lr_scheduler.MultiStepLR(Arcface_opt, milestones=[30,50], gamma=0.1)

Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /root/.cache/torch/checkpoints/densenet161-8d451a50.pth


HBox(children=(FloatProgress(value=0.0, max=115730790.0), HTML(value='')))




In [None]:
epochs = 60
save_model_path = 'Checkpoint'
steps = 0
running_loss = 0

print('Start fine-tuning...')
best_acc = 0.
best_epoch = None
end_patient = 0

for epoch in range(epochs):
    
    start_time = time.time()
    for idx, data in enumerate(train_loader):
        steps += 1
        
        # Move input and label tensors to the default device
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        Extraction_opt.zero_grad()
        fullycon_opt.zero_grad()
        Arcface_opt.zero_grad()
        
        feature = F_Extraction(inputs)
        feature = F_Connected(feature)
        _, output = Arcface(feature, labels)
        loss = criterion(output, labels)

        loss.backward()
        Extraction_opt.step()
        fullycon_opt.step()
        Arcface_opt.step()

        running_loss += loss.item()
    stop_time = time.time()
    print('Epoch {}/{} and used time: {:.4f} sec.'.format(epoch+1, epochs, stop_time - start_time))
    
    F_Extraction.eval(), F_Connected.eval(), Arcface.eval()
    for name, loader in [("train", train_loader), ("test", test_loader)]:
        _acc = 0
        _loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for data in loader:
                
                imgs, labels = data
                imgs, labels = imgs.to(device), labels.to(device)
                
                feature = F_Extraction(imgs)
                feature = F_Connected(feature)
                logit, output = Arcface(feature, labels)
                loss = criterion(output, labels)
                _loss += loss.item()
                
                result = F.softmax(logit, dim=1)
                _, predicted = torch.max(result, dim=1)
                
                total += labels.shape[0]
                correct += int((predicted == labels).sum())
            _acc = 100 * correct  / total
            _loss = _loss / len(loader)
            
        print('{} loss: {:.6f}    {} accuracy: {:.4f}'.format(name, _loss, name, _acc))
    print()
    

    running_loss = 0
    F_Extraction.train(), F_Connected.train(), Arcface.train()
    Extraction_scheduler.step(_acc)
    fullycon_scheduler.step(_acc)
    Arcface_scheduler.step(_acc)
    
    if _acc > best_acc:
#         model_file = os.path.join(save_model_path, 'resnet34_CUB_200_fine_tuning_epoch_{}_acc_{}.pth'.format(best_epoch, best_acc))
        
#         if os.path.isfile(model_file):
#             os.remove(os.path.join(save_model_path, 'resnet34_CUB_200_fine_tuning_epoch_{}_acc_{}.pth'.format(best_epoch, best_acc)))
        
        end_patient = 0
        best_acc = _acc
        best_epoch = epoch + 1
#         print('The accuracy is improved, save model')
#         torch.save(model.state_dict(), os.path.join(save_model_path,'resnet34_CUB_200_fine_tuning_epoch_{}_acc_{}.pth'.format(best_epoch, best_acc)))
        
    else:
        end_patient += 1

    if end_patient >= 10 and epoch > 50:
        break
print('After the training, the end of the epoch {}, the highest accuracy is {:.2f}'.format(best_epoch, best_acc))

Start fine-tuning...
Epoch 1/60 and used time: 434.0897 sec.
train loss: 22.368288    train accuracy: 83.8040
test loss: 25.304214    test accuracy: 74.2072





Epoch 2/60 and used time: 432.6561 sec.
train loss: 14.360408    train accuracy: 95.5550
test loss: 19.047729    test accuracy: 86.0092

Epoch 3/60 and used time: 433.2686 sec.
train loss: 9.606579    train accuracy: 97.7775
test loss: 15.426206    test accuracy: 89.2426

Epoch 4/60 and used time: 434.0155 sec.
train loss: 6.672475    train accuracy: 98.2932
test loss: 13.157254    test accuracy: 90.8718

Epoch 5/60 and used time: 435.9317 sec.
train loss: 4.555559    train accuracy: 98.8581
test loss: 11.665681    test accuracy: 91.8667

Epoch 6/60 and used time: 433.6046 sec.
train loss: 2.950754    train accuracy: 99.2878
test loss: 10.422315    test accuracy: 92.5009

Epoch 7/60 and used time: 434.5586 sec.
train loss: 1.873774    train accuracy: 99.3861
test loss: 9.656757    test accuracy: 92.9362

Epoch 8/60 and used time: 435.6669 sec.
train loss: 1.146389    train accuracy: 99.5948
test loss: 9.107306    test accuracy: 93.5829

Epoch 9/60 and used time: 436.2263 sec.
train los