This script is to finetune the pretrained CNN on the target dataset. This is not the joint finetuning.

## Imports

In [None]:
# basic
import numpy as np
import pandas as pd
import random
import itertools
import os
import copy
import time

# plot
import matplotlib.pyplot as plt
import pylab
from tqdm import tqdm

# machine learning
from sklearn.metrics import confusion_matrix

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms

# custom
import video_loader
import tvgg
import tdense
from custom_models import DTAN, Zhang

## Hyperparameters

In [None]:
N_frames = 12
batch_size = 8
img_size = 64

## Create dataloaders

Only run one of the sections for the target training dataset. For loading videos in Pytorch, I made a single image loader class for all datasets. This turned out to be a bad idea, since every dataset has its own specificities, which has thus made the code bulky and unflexible. One possible improvement would be to make a separate class for each dataset, just as I have done for the fixed image datasets.

### AFEW

In [None]:
classes =  {0:'neutral', 1:'angry', 2:'disgust', 3:'fear', 4:'happy', 5:'sad', 6:'surprise'}
N_frames = 12
N_landmarks = 49
N_classes = len(classes)

# preprocessing
data_dir = os.path.join('/home','nii','Documents','EmotiW_2018','Train_AFEW')
data_dir_val = os.path.join('/home','nii','Documents','EmotiW_2018','Val_AFEW')

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]   

    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='train', img_type='jpg',
                                 classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                   are_subjects=False)
                    for x in k_folders}


validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='valid', img_type='jpg',
                                 classes=classes, n_frames=N_frames,  n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                     are_subjects=False)
                    for x in k_folders}

testing_datasets = {x: video_loader.VideoFolder(root=data_dir_val, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='test', img_type='jpg',
                                 classes=classes, n_frames=N_frames,  n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                     are_subjects=False)
                    for x in k_folders}

fold = 8

training_folds = [x for x in range(K) if x != fold]
validation_fold = fold

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = torch.utils.data.ConcatDataset([testing_datasets[k_folders[k]] 
                                                          for k in range(K)])

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=64, shuffle=True,
                                              num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

In [None]:
def vidshow(video, title=None):
    fig, axes = plt.subplots(nrows=videos.size()[0], ncols=videos.size()[1], figsize=(20,20))
    
    #mean = np.array([0.485, 0.456, 0.406])
    #std = np.array([0.229, 0.224, 0.225])
    
    for vid in range(videos.size()[0]):
        sequence = videos[vid]
        for frame in range(videos.size()[1]):
            #im = sequence[frame].numpy().transpose((1, 2, 0))
            im = np.squeeze(sequence[frame].numpy())
            
            # to original color
            #im = std * im + mean
            #im = np.clip(im, 0, 1)
            
            axes[vid][frame].imshow(im, cmap='gray')

In [None]:
videos[0,-1].size()

In [None]:
videos, classes, ld = next(iter(dataloaders['train']))
vidshow(videos, ld)

### CK+

In [None]:
classes =  {0:'anger', 1:'contempt', 2:'disgust', 3:'fear', 4:'happy', 5:'sadness', 6:'surprise'}
N_frames = 12
N_classes = len(classes)
N_landmarks = 68

# preprocessing
data_dir = os.path.join('/home','nii','Documents','CK+')

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]

    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cohn-kanade-images-crop', 
                                 label_folder='Emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='train', classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                                 transform=data_transforms,
                                 indexing=1)
                    for x in k_folders}

validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cohn-kanade-images-crop', 
                                 label_folder='Emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='valid', classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                                   transform=data_transforms,
                                 indexing=1)
                    for x in k_folders}

fold = 9

training_folds = [x for x in range(K) if x != fold]
validation_fold = random.choice(training_folds)
training_folds = [x for x in training_folds if x is not validation_fold]

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = validation_datasets[k_folders[fold]]

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=batch_size, shuffle=True,
                                              num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

### Oulu-CASIA

In [None]:
classes =  {0:'anger', 1:'disgust', 2:'fear', 3:'happy', 4:'sadness', 5:'surprise'}
data_dir = os.path.join('/home','nii','Documents', 'OriginalImg', 'VL')
N_frames = 12
N_classes = len(classes)
N_landmarks = 68

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]
    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='Strong-crop', 
                                 label_folder='Strong-emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='train', classes=classes, img_type='jpeg', n_landmarks=N_landmarks,
                                    n_frames=N_frames, transform=data_transforms)
                    for x in k_folders}

validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='Strong-crop', 
                                 label_folder='Strong-emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='valid', classes=classes, img_type='jpeg', n_landmarks=N_landmarks,
                                      n_frames=N_frames, transform=data_transforms)
                    for x in k_folders}


fold = 7

training_folds = [x for x in range(K) if x != fold]
validation_fold = random.choice(training_folds)
training_folds = [x for x in training_folds if x is not validation_fold]

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = validation_datasets[k_folders[fold]]

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=32,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

## Training

In [None]:
def train_model(model, criterion, optimizer, num_epochs=25, k=5, alpha=0.2):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 1e6
    
    stop_criterion = True
    
    train_accuracy = []
    val_accuracy = []
    train_loss = []
    val_loss = []
    
    #for epoch in range(num_epochs):
    epoch = 0
    while stop_criterion:
        #print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('Epoch {}'.format(epoch))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels, _ = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                b_s = inputs.size(0)
                if b_s != 1:
                    outputs = model(torch.squeeze(inputs))
                else:
                    outputs = model(torch.squeeze(inputs).unsqueeze(0))
                _, preds = torch.max(outputs.data, 1)
                
                loss = criterion(outputs, labels)
                
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            if phase == 'train':
                train_accuracy.append(epoch_acc)
                train_loss.append(epoch_loss)
            else:
                val_accuracy.append(epoch_acc)
                val_loss.append(epoch_loss)
                GL = 100 * (epoch_loss/best_loss - 1)
                Pk = 1000 * (sum(train_loss[-k:]) / (k*min(train_loss[-k:])) - 1)
                PQ = GL / Pk
                
                print('PQ = ' + str(PQ))
                
                if PQ > alpha or epoch == num_epochs:
                    stop_criterion = False
                
            # deep copy the model
            if phase == 'val' and epoch_acc >= best_acc:
                best_acc = epoch_acc
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        epoch += 1
        
        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, best_acc, train_accuracy, train_loss, val_accuracy, val_loss


def test_model(model, criterion):
    
    model.train(False)
    
    running_loss = 0.0
    running_corrects = 0.0
    
    truth = []
    prediction = []

    # Iterate over data.
    for data in dataloaders['test']:
        
        # get the inputs
        inputs, labels, _ = data

        # wrap them in Variable
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        b_s = inputs.size(0)
        
        # forward
        if b_s != 1:
            outputs = model(torch.squeeze(inputs))
        else:
            outputs = model(torch.squeeze(inputs).unsqueeze(0))
        
        _, preds = torch.max(outputs.data, 1)
        
        loss = criterion(outputs, labels)
        
        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        
        truth.extend(labels.cpu().data.numpy().tolist())
        prediction.extend(preds.cpu().numpy().tolist())

    total_loss = running_loss / dataset_sizes['test']
    total_acc = running_corrects / dataset_sizes['test']

    print('{} Loss: {:.4f} Acc: {:.4f}'.format('test', total_loss, total_acc))
    
    cnf_matrix = confusion_matrix(truth, prediction, labels=list(range(N_classes)))

    return total_loss, total_acc, cnf_matrix

In [None]:
del model

In [None]:
save_folder = 'model'
model = tvgg.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
#model = tvgg.vgg16_bn(num_classes=7, n_frames=N_frames)
#model = tdense.densenet121(num_classes=N_classes)
#model = Zhang(n_frames=N_frames, n_classes=7)
#model.load_state_dict(torch.load(os.path.join(save_folder, 'zhang.pt')))
model.load_state_dict(torch.load(os.path.join(save_folder, 'vgg11.pt')))
#model.load_state_dict(torch.load(os.path.join(save_folder, 'dense.pt')))

use_gpu = torch.cuda.is_available()
#use_gpu = False

for param in model.parameters():
    param.requires_grad = True

parameters = model.parameters()

if use_gpu:
    model = model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(parameters, lr=0.001, weight_decay=5e-5)

In [None]:
model

In [None]:
model, _, train_acc, train_loss, val_acc, val_loss = train_model(model, criterion, optimizer, num_epochs=200, k=10, alpha=2.)

In [None]:
test_loss, test_accuracy, conf = test_model(model, criterion)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(train_acc,color='r', label='training')
plt.plot(val_acc,color='b', label='validation')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.title('VGG training')
pylab.legend(loc='lower right')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(train_loss,color='r', label='training')
plt.plot(val_loss,color='b', label='validation')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.title('DTAN training')
pylab.legend(loc='lower right')
plt.tight_layout()
plt.show()

In [None]:
# save model
save_folder = 'model'
save_path = os.path.join(save_folder, 'vgg_finetune_afew6.pt')
torch.save(model.state_dict(), save_path)

## K-fold crossvalidation finetuning

In [None]:
k_accuracy = []
save_folder = 'model'
use_gpu = torch.cuda.is_available()

for fold in range(K):
    
    print('Starting fold ' + str(fold) + ' ...')
    
    # reassign datasets to training, validation and testing
    training_folds = [x for x in range(K) if x != fold]
    validation_fold = random.choice(training_folds)
    training_folds = [x for x in training_folds if x is not validation_fold]

    cross_datasets = {}
    cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                              for k in training_folds])
    cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

    cross_datasets['test'] = validation_datasets[k_folders[fold]]
    
    dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=batch_size,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}
    
    # init model
    model = tvgg.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
    model.load_state_dict(torch.load(os.path.join(save_folder, 'vgg11.pt')))
    #model = tdense.densenet121(num_classes=7)
    #model.load_state_dict(torch.load(os.path.join(save_folder, 'dense.pt')))
    
    # set up optimizer
    parameters = model.parameters()

    if use_gpu:
        model = model.cuda()

    optimizer = optim.Adamax(parameters, lr=0.001, weight_decay=5e-5)

    # train model
    model, val_accuracy, train_acc, train_loss, val_acc, val_loss = train_model(model, criterion, optimizer, 
                                                                                num_epochs=300, k=20, alpha=2.0)
    test_loss, test_accuracy, conf = test_model(model, criterion)
    
    # save model
    save_path = os.path.join(save_folder, 'vgg_finetune' + str(fold) + '.pt')
    #save_path = os.path.join(save_folder, 'dense_finetune' + str(fold) + '.pt')
    torch.save(model.state_dict(), save_path)
    
    print('Finished fold ' + str(fold) + ' with validation accuracy of ' + str(val_accuracy))
    k_accuracy.append(test_accuracy)
    del model

print('Mean value of test accuracy over ' + str(K) + '-fold crossvalidation is: '
      + str(sum(k_accuracy) / float(len(k_accuracy))))

In [None]:
plt.boxplot(k_accuracy)

In [None]:
k_accuracy

In [None]:
del model

### Cross-val test

In [None]:
del model

In [None]:
k_accuracy = []
conf_math = np.zeros((N_classes, N_classes))

for fold in range(K):
    print('Starting fold ' + str(fold) + ' ...')
    
    # reassign datasets to training, validation and testing
    training_folds = [x for x in range(K) if x != fold]
    validation_fold = random.choice(training_folds)
    training_folds = [x for x in training_folds if x is not validation_fold]
    
    cross_datasets = {}
    cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                              for k in training_folds])
    cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]
    
    cross_datasets['test'] = validation_datasets[k_folders[fold]]

    dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=64,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}
        
    #tvgg = tdense.densenet121(num_classes=7)
    #tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'dense_finetune' + str(fold) + '.pt')))
    
    model = tvgg.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
    model.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune' + str(fold) + '.pt')))
    model.eval()
    
    if use_gpu:
        model = model.cuda()
    
    test_loss, test_accuracy, conf = test_model(model, criterion)
    conf_math += conf
    k_accuracy.append(test_accuracy)
    
    del model
    
print('Mean value of test accuracy over ' + str(K) + '-fold crossvalidation is: ' 
      + str(sum(k_accuracy) / float(len(k_accuracy))))

In [None]:
conf_math

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    #plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(conf_math, classes=list(classes.values()), normalize=True,
                      title='Normalized confusion matrix')

plt.show()

## Visualize model

In [None]:
del model

In [None]:
def visualize_model(model, num_images=6):
    images_so_far = 0
    fig = plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')

    for i, data in enumerate(dataloaders['test']):
        inputs, labels, _ = data
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(torch.squeeze(inputs))
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {} label: {}'.format(classes[preds[j]], classes[labels.data[j]]))
            plt.imshow(torch.squeeze(inputs.cpu().data[j,-1]), cmap='gray')

            if images_so_far == num_images:
                return

In [None]:
model = tvgg.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
model.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune' + str(fold) + '.pt')))
if use_gpu:
    model = model.cuda()

In [None]:
visualize_model(model)