# Load data

In [19]:
import PIL.Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms, models
import time
import copy

In [3]:
torch.__version__

'1.6.0'

In [4]:
torch.cuda.is_available()

True

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# torch.set_default_tensor_type(torch.cuda.FloatTensor)
torch.backends.cudnn.benchmark=True

## Get CSVs

In [7]:
train_df = pd.read_csv('../data/csvs/train.csv')
# test_df = pd.read_csv('../data/csvs/test.csv')

In [8]:
train_df.head(5)

Unnamed: 0.1,Unnamed: 0,file,lvl_three,lvl_one,lvl_two
0,0,1_220_F_83683073_O4yJOnarzTjKXuUBAgkAifmiC8d0I...,1,0,3
1,1,20_220_F_5292725_818KTy3xv82nEkNolcs2m37MOV86s...,20,1,5
2,2,20_220_F_47187567_lwYwc9UQtBK5Be6v4P7HNsCc4Hhr...,20,1,5
3,3,1_220_F_38932828_Osns7NBWCq8AhJonYpQArrToDLLhT...,1,0,3
4,4,1_220_F_97168737_y0VWy7kLMby9BO6lHDfpyfNpW9o0S...,1,0,3


# Split data

Scikit-learn definitely takes the cake for ease in stratified splitting. A helper function splits the training and validation sets for ease of input into a Pytorch dataset class.

In [9]:
def train_val_split(files, target, test_size, stratify=True):
    """
    
    """
    if stratify:
        X_train, X_val, y_train, y_val = train_test_split(files, target, test_size=test_size, stratify=target)
    else:
        X_train, X_val, y_train, y_val = train_test_split(files, target, test_size=test_size)
    train_split = pd.concat([X_train, y_train], axis = 1)
    val_split = pd.concat([X_val, y_val], axis = 1) 
    return train_split, val_split

In [10]:
# train_split, val_split = train_val_split(train_df['file'], train_df['lvl_one'], test_size=0.1)
train_split, val_split = train_val_split(train_df['file'], train_df['lvl_two'], test_size=0.1)

# Utils

In [11]:
# import numpy as np
# import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience.
    
    Developed by: https://github.com/Bjarten/early-stopping-pytorch
    
    """
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

In [12]:
def weighted_sample(df, col):
    class_sample_counts = df[col].value_counts().reset_index().sort_values('index')[col].tolist()
    weights = 1. / torch.tensor(class_sample_counts, dtype=torch.float)
    samples_weights = weights[df[col].tolist()]
    return samples_weights

In [13]:
def train_model(model, dataloader, criterion, optimizer, save_path, num_epochs=25):
    since = time.time()
    
    train_loss_history = []
    train_acc_history = []
    val_loss_history = []
    val_acc_history = []

#     best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = np.inf
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=7, verbose=True)
    
    for epoch in range(num_epochs):
        
#         epoch_time = time.time()
        
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        if early_stopping.early_stop:
                    print("Early stopping")
                    break

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloader[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
#             epoch_time_elapsed = time.time() - epoch_time
#             print('Training complete in {:.0f}m {:.0f}s'.format(epoch_time_elapsed // 60, epoch_time_elapsed % 60))

            epoch_loss = running_loss / len(dataloader[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloader[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                print('saving model')
                save_model(save_path, epoch, model, optimizer, epoch_loss, epoch_acc)
            # save metrics
            if phase == 'train':
                train_acc_history.append(epoch_acc)
                train_loss_history.append(epoch_loss)
            if phase == 'val':
                val_acc_history.append(epoch_acc)
                val_loss_history.append(epoch_loss)
                early_stopping(epoch_loss, model)

                if early_stopping.early_stop:
                    print("Early stopping")
                    break
            #early stopping
            
        print()
        

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    

    # load best model weights
    #model.load_state_dict(best_model_wts)
    checkpoint = torch.load(save_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    print('Best val Acc: {:4f}'.format(checkpoint['acc']))
    print('Best val Loss: {:4f}'.format(checkpoint['loss']))
    
    return model, train_loss_history, train_acc_history, val_loss_history, val_acc_history

In [14]:
def save_model(path, epoch, model, optimizer, loss, acc):
    torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                'acc': acc
                }, path)

# Dataset & Dataloaders

In [15]:
class ImgDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, df, root_dir, percent_sample=None, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.img_df=df
        self.root_dir=root_dir
        self.transform=transform
        self.percent_sample=percent_sample

    def __len__(self):
        if self.percent_sample:
            assert self.percent_sample > 0.0, 'Percentage to sample must be >= 0 and <= 1.'
            assert self.percent_sample <= 1.0, 'Percentage to sample must be >= 0 and <= 1.'
            return int(np.floor(len(self.img_df) * self.percent_sample))
        else:
            return len(self.img_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        img_name = os.path.join(self.root_dir,
                                self.img_df.iloc[idx, 0])
        X = PIL.Image.open(img_name).convert('RGB') #Some images in greyscale, so converting to ensure 3 channels - 1 causes issues in transformers
        y = self.img_df.iloc[idx, 1]
        
        if self.transform:
            X = self.transform(X)

        return X, y

In [16]:
train_transforms = transforms.Compose([ 
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], #OG means/sds from imagenet
                         std=[0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([ 
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [74]:
train_dataset = ImgDataset(df=train_split,
                           root_dir='../data/images/train/train',
                           percent_sample=1,
                           transform=train_transforms
                          )
val_dataset = ImgDataset(df=val_split,
                           root_dir='../data/images/train/train',
                           percent_sample=1,
                           transform=val_transforms
                          )

In [75]:
samples_weights = weighted_sample(train_split, 'lvl_two')
weighted_sampler = WeightedRandomSampler(weights=samples_weights, num_samples=len(samples_weights))

In [77]:
train_loader = DataLoader(train_dataset,
                          batch_size=128, 
                          pin_memory=True,
                          sampler=weighted_sampler,
                          num_workers=4)
val_loader = DataLoader(val_dataset, 
                        batch_size=128,
                        pin_memory=True,
                        num_workers=4)

In [78]:
loaders_dict = {'train': train_loader, 
                'val': val_loader}

# Training

In [79]:
prior_model_classes = 2
new_level_classes = 7
num_epochs = 20

In [80]:
checkpoint = torch.load('noflip_best_model.tar')
checkpoint_state_dict = checkpoint['model_state_dict']

In [81]:
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, prior_model_classes))
model_ft.load_state_dict(checkpoint_state_dict) # Loading level 1 model

model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, new_level_classes))
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer_ft = torch.optim.SGD(model_ft.parameters(), 
                               lr=0.001, #Lower learning rate by 1/10
                               momentum=0.9, 
                               weight_decay=0.0001)

In [82]:
torch.cuda.empty_cache()

In [83]:
# Run the functions and save the best model in the function model_ft.
model_ft, \
train_val, train_acc, \
val_loss, val_acc = train_model(model_ft, 
                                loaders_dict, 
                                criterion, 
                                optimizer_ft,
                                'initial_level2.tar',
                                num_epochs)

Epoch 0/19
----------
Training complete in 9m 14s
train Loss: 1.6252 Acc: 0.3592
Training complete in 9m 35s
val Loss: 1.4889 Acc: 0.4020
saving model

Epoch 1/19
----------
Training complete in 9m 14s
train Loss: 1.5149 Acc: 0.4095
Training complete in 9m 35s
val Loss: 1.4184 Acc: 0.4424
saving model

Epoch 2/19
----------
Training complete in 9m 14s
train Loss: 1.4722 Acc: 0.4278
Training complete in 9m 35s
val Loss: 1.4296 Acc: 0.4396
EarlyStopping counter: 1 out of 7

Epoch 3/19
----------
Training complete in 9m 14s
train Loss: 1.4369 Acc: 0.4427
Training complete in 9m 35s
val Loss: 1.4175 Acc: 0.4401
saving model

Epoch 4/19
----------
Training complete in 9m 14s
train Loss: 1.4066 Acc: 0.4563
Training complete in 9m 36s
val Loss: 1.4163 Acc: 0.4456
saving model

Epoch 5/19
----------
Training complete in 9m 14s
train Loss: 1.3681 Acc: 0.4713
Training complete in 9m 36s
val Loss: 1.3901 Acc: 0.4630
saving model

Epoch 6/19
----------
Training complete in 9m 14s
train Loss: 1.339

KeyboardInterrupt: 