In [None]:
# basic
import numpy as np
import random
import itertools
import time
import os
import copy

# pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms

# plot
import matplotlib.pyplot as plt
import pylab

# machine learning
from sklearn.metrics import confusion_matrix

# custom
import video_loader
import custom_models as cm
import tvgg as tv
import tdense

## Hyperparameters

In [None]:
N_frames = 12
batch_size = 64
img_size = 64

## Load data

### CK+

In [None]:
classes =  {0:'anger', 1:'contempt', 2:'disgust', 3:'fear', 4:'happy', 5:'sadness', 6:'surprise'}
N_frames = 12
N_classes = len(classes)
N_landmarks = 68

# preprocessing
data_dir = os.path.join('/home','nii','Documents','CK+')

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]

    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cohn-kanade-images-crop', 
                                 label_folder='Emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='train', classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                                 transform=data_transforms,
                                 indexing=1)
                    for x in k_folders}

validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cohn-kanade-images-crop', 
                                 label_folder='Emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='valid', classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                                   transform=data_transforms,
                                 indexing=1)
                    for x in k_folders}

fold = 8

training_folds = [x for x in range(K) if x != fold]
validation_fold = random.choice(training_folds)
training_folds = [x for x in training_folds if x is not validation_fold]

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = validation_datasets[k_folders[fold]]

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=batch_size, shuffle=True,
                                              num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

### Oulu-CASIA

In [None]:
classes =  {0:'anger', 1:'disgust', 2:'fear', 3:'happy', 4:'sadness', 5:'surprise'}
data_dir = os.path.join('/home','nii','Documents', 'OriginalImg', 'VL')
N_frames = 12
N_classes = len(classes)
N_landmarks = 68

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]
    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='Strong-crop', 
                                 label_folder='Strong-emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='train', classes=classes, img_type='jpeg', n_landmarks=N_landmarks,
                                    n_frames=N_frames, transform=data_transforms)
                    for x in k_folders}

validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='Strong-crop', 
                                 label_folder='Strong-emotion', landmark_folder='Landmarks_crop',
                                 fold=x, phase='valid', classes=classes, img_type='jpeg', n_landmarks=N_landmarks,
                                      n_frames=N_frames, transform=data_transforms)
                    for x in k_folders}


fold = 1

training_folds = [x for x in range(K) if x != fold]
validation_fold = random.choice(training_folds)
training_folds = [x for x in training_folds if x is not validation_fold]

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = validation_datasets[k_folders[fold]]

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=32,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

### AFEW

In [None]:
classes =  {0:'neutral', 1:'angry', 2:'disgust', 3:'fear', 4:'happy', 5:'sad', 6:'surprise'}
N_frames = 12
N_landmarks = 49
N_classes = len(classes)
use_gpu = torch.cuda.is_available()

# preprocessing
data_dir = os.path.join('/home','nii','Documents','EmotiW_2018','Train_AFEW')
data_dir_val = os.path.join('/home','nii','Documents','EmotiW_2018','Val_AFEW')

data_transforms = transforms.Compose(
    [transforms.Resize((64,64))])

K = 10
k_folders = ['set_' + str(idx) for idx in range(K)]   

    
training_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='train', img_type='jpg',
                                 classes=classes, n_frames=N_frames, n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                   are_subjects=False)
                    for x in k_folders}


validation_datasets = {x: video_loader.VideoFolder(root=data_dir, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='valid', img_type='jpg',
                                 classes=classes, n_frames=N_frames,  n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                     are_subjects=False)
                    for x in k_folders}

testing_datasets = {x: video_loader.VideoFolder(root=data_dir_val, image_folder='cropped_images', 
                                 label_folder='emotion', landmark_folder='landmarks',
                                 fold=x, phase='test', img_type='jpg',
                                 classes=classes, n_frames=N_frames,  n_landmarks=N_landmarks,
                                 transform=data_transforms, indexing=0,
                                     are_subjects=False)
                    for x in k_folders}

fold = 6

training_folds = [x for x in range(K) if x != fold]
validation_fold = fold

cross_datasets = {}
cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                          for k in training_folds])
cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]

cross_datasets['test'] = torch.utils.data.ConcatDataset([testing_datasets[k_folders[k]] 
                                                          for k in range(K)])

dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=32, shuffle=True,
                                              num_workers=4)
              for x in ['train', 'val', 'test']}

dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}

## Define model

The source code is here so that the models with the hidden states can be loaded directly.

In [None]:
# BiRNN Model (Many-to-One)
class BiRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, 
                            batch_first=True, bidirectional=True)
    
    def forward(self, x):
        
        # Set initial states
        if x.is_cuda:
            h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).cuda()) # 2 for bidirection 
            
        else:
            h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) # 2 for bidirection 
        
        # Forward propagate RNN
        out, _ = self.rnn(x, h0)
        
        return out
    

class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, bidirectional=True)
    
    def forward(self, x):
        
        # Set initial states
        if x.is_cuda:
            h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).cuda()) # 2 for bidirection 
            c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).cuda())
            
        else:
            h0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size)) # 2 for bidirection 
            c0 = Variable(torch.zeros(self.num_layers*2, x.size(0), self.hidden_size))
        
        # Forward propagate RNN
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode hidden state of last time step
        #out = out[:, -1, :]
        
        return out
    

def make_layers(cfg):
    layers = []
    factor = 1
    for v in cfg:
        # extract layer configuration
        mask, hidden_size, n_layers, double, memory = v
        if memory:
            # memory layer
            factor *= 2 # temporary correction --> multiply by the number of previous layers
            layers += [BiLSTM(len(mask)*factor, hidden_size, n_layers)]
        else:
            layers += [nn.ModuleList([BiRNN(len(mask[i])*factor, hidden_size, n_layers)
                                for i in range(len(mask))])]
            if double:
                layers += [nn.ModuleList([BiRNN(hidden_size*2, hidden_size, n_layers)
                               for i in range(len(mask))])]
        factor = 2 * hidden_size
        
    return nn.ModuleList(layers)


class Flexible(nn.Module):
    def __init__(self, cfg, n_landmarks, n_classes, use_mask=True):
        super(Flexible, self).__init__()
        
        self.n_landmarks = n_landmarks
        self.n_classes = n_classes
        self.cfg = cfg
        self.use_mask = use_mask
        
        # feature extraction
        self.features = make_layers(self.cfg)
        
        self.classifier = nn.Sequential(
            nn.Linear(480, 8192),
            nn.ReLU(True),
            nn.Linear(8192, 4096),
            nn.ReLU(True),
            nn.Linear(4096, self.n_classes),
        )
        
    
    def forward(self, x):
        
        batch_size = x.size(0)
        
        """
        if self.use_mask: 
            x = x.view(batch_size, -1, 2*68)
            mask = np.ones((2*68, 1),  dtype=bool)
            mask[:34] = False
            mask[120:122] = False
            mask[128:130] = False
            x = x[:,:,np.where(mask)[0]]
            
        else:
            x = x.view(batch_size, -1, 2*49)
        """

        #"""
        x = x.view(batch_size, -1, 2*68)
        mask = np.ones((2*68, 1),  dtype=bool)
        mask[:34] = False
        mask[120:122] = False
        mask[128:130] = False
        x = x[:,:,np.where(mask)[0]]
        #"""
        
        num_layer = 0
        for v in self.cfg:
            # extract layer configuration
            
            mask, hidden_size, n_layers, double, memory = v
            
            # LSTM layer
            if memory:
                x = x.contiguous().view(batch_size, n_layers, -1)
                x = self.features[num_layer](x)
                x = x.contiguous().view(batch_size, -1)
                num_layer += 1
              
            # RNN layer
            else:
                out_features = []
                for i in range(len(mask)):
                    if double:
                        landmarks = x[:,:,mask[i]].view(batch_size, n_layers, -1)
                        landmarks = self.features[num_layer+1][i](self.features[num_layer][i](landmarks))

                    else:
                        landmarks = x[:,:,mask[i]].view(batch_size, n_layers, -1)
                        landmarks = self.features[num_layer][i](landmarks)

                    out_features.append(landmarks)
                    
                if double:
                    num_layer += 2
                else:
                    num_layer += 1

                x = torch.stack(out_features).permute(1,2,0,3)
                
        # classification
        x = self.classifier(x)
        
        return x

In [None]:
del bilstm
del tvgg

In [None]:
save_folder = 'model'
#bilstm = Flexible(my_config, N_landmarks, N_classes)
#load_valid(bilstm, os.path.join(save_folder, 'custom_1.pt'))
#bilstm.load_state_dict(torch.load(os.path.join(save_folder, 'custom_1.pt')), strict=False)
bilstm = torch.load(os.path.join(save_folder, 'manual_oulu1.pt'))

#tvgg = tdense.densenet121(num_classes=7)
tvgg = tv.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
#tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'dense_finetune8.pt')))
#tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune8.pt')))
tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune_oulu1.pt')))

In [None]:
bilstm

In [None]:
tvgg

In [None]:
use_gpu = True
criterion = nn.CrossEntropyLoss()

Freeze feature extractors and only tune classifiers.

In [None]:
if use_gpu:
    bilstm = bilstm.cuda()
    tvgg = tvgg.cuda()

for param in bilstm.features.parameters():
    param.requires_grad = False
    
for param in bilstm.classifier.parameters():
    param.requires_grad = True
    
for param in tvgg.features.parameters():
    param.requires_grad = False
    
for param in tvgg.classifier.parameters():
    param.requires_grad = True

parameters = filter(lambda p: p.requires_grad, itertools.chain(bilstm.parameters(), tvgg.parameters()))

optimizer = optim.Adam(parameters, lr=0.001, weight_decay=5e-5)

Since bilstm underperforms, better results are obtained when more weight is given to the TCNN.

In [None]:
# tuning parameters
lambda_1 = 1
lambda_2 = 1
lambda_3 = 0.1

def train_model(tvgg, bilstm, criterion, optimizer, num_epochs=25):
    since = time.time()

    best_tvgg_wts = copy.deepcopy(tvgg.state_dict())
    best_bilstm_wts = copy.deepcopy(bilstm.state_dict())
    
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                tvgg.train(True)  # Set model to training mode
                bilstm.train(True)  # Set model to training mode

            else:
                tvgg.train(False)  # Set model to evaluate mode
                bilstm.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels, landmarks = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                    landmarks = Variable(landmarks.float().cuda())
                else:
                    inputs, labels, landmarks = Variable(inputs), Variable(labels), Variable(landmarks.float())
                    
                # subsample landmarks for blstm
                b_s = landmarks.size(0)
                landmarks = landmarks.view(b_s, N_frames, 2*N_landmarks)
                slices = int(landmarks.size(1)*0.25) 
                idx = np.linspace(0, landmarks.size(1)-1, slices, dtype=np.int)
                landmarks = landmarks[:,idx,:]

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if b_s != 1:
                    outputs_tvgg = tvgg(torch.squeeze(inputs))
                else:
                    outputs_tvgg = tvgg(torch.squeeze(inputs).unsqueeze(0))
                    
                outputs_bilstm = bilstm(landmarks)
                
                outputs = outputs_tvgg + outputs_bilstm
                
                _, preds = torch.max(outputs.data, 1)
                
                loss_1 = criterion(outputs_tvgg, labels)
                loss_2 = criterion(outputs_bilstm, labels)
                loss_3 = criterion(outputs, labels)
                
                loss = lambda_1 * loss_1 + lambda_2 * loss_2 + lambda_3 * loss_3
                
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
                
            # deep copy the model
            if phase == 'val' and epoch_acc >= best_acc:
                best_acc = epoch_acc
                best_tvgg_wts = copy.deepcopy(tvgg.state_dict())
                best_bilstm_wts = copy.deepcopy(bilstm.state_dict())

        print()
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    tvgg.load_state_dict(best_tvgg_wts)
    bilstm.load_state_dict(best_bilstm_wts)
    return tvgg, bilstm, best_acc


def test_model(tvgg, bilstm, criterion):
    
    tvgg.train(False)
    bilstm.train(False)
    
    truth = []
    prediction = []
    
    running_loss = 0.0
    running_corrects = 0.0

    # Iterate over data.
    for data in dataloaders['test']:
        # get the inputs
        inputs, labels, landmarks = data

        # wrap them in Variable
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
            landmarks = Variable(landmarks.float().cuda())
        else:
            inputs, labels, landmarks = Variable(inputs), Variable(labels), Variable(landmarks.float())
         
        # subsample landmarks for blstm
        b_s = landmarks.size(0)
        landmarks = landmarks.view(b_s, N_frames, 2*N_landmarks)
        slices = int(landmarks.size(1)*0.25) 
        idx = np.linspace(0, landmarks.size(1)-1, slices, dtype=np.int)
        landmarks = landmarks[:,idx,:]
        
        # forward
        if b_s != 1:
            outputs_tvgg = tvgg(torch.squeeze(inputs))
        else:
            outputs_tvgg = tvgg(torch.squeeze(inputs).unsqueeze(0))
        outputs_bilstm = bilstm(landmarks)
        outputs = outputs_tvgg + outputs_bilstm

        _, preds = torch.max(outputs.data, 1)
        
        truth.extend(labels.cpu().data.numpy().tolist())
        prediction.extend(preds.cpu().numpy().tolist())

        loss_1 = criterion(outputs_tvgg, labels)
        loss_2 = criterion(outputs_bilstm, labels)
        loss_3 = criterion(outputs, labels)

        loss = lambda_1 * loss_1 + lambda_2 * loss_2 + lambda_3 * loss_3
        
        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / dataset_sizes['test']
    total_acc = running_corrects / dataset_sizes['test']

    print('{} Loss: {:.4f} Acc: {:.4f}'.format('test', total_loss, total_acc))
    
    cnf_matrix = confusion_matrix(truth, prediction, labels=list(range(N_classes)))

    return total_loss, total_acc, cnf_matrix


def test_single_model(model, criterion):
    
    model.train(False)
    
    running_loss = 0.0
    running_corrects = 0.0

    # Iterate over data.
    for data in dataloaders['test']:
        
        # get the inputs
        inputs, labels, _ = data

        # wrap them in Variable
        if use_gpu:
            inputs = Variable(inputs.cuda())
            labels = Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        b_s = inputs.size(0)
        
        # forward
        if b_s != 1:
            outputs = model(torch.squeeze(inputs))
        else:
            outputs = model(torch.squeeze(inputs).unsqueeze(0))
        
        _, preds = torch.max(outputs.data, 1)
        
        loss = criterion(outputs, labels)
        
        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / dataset_sizes['test']
    total_acc = running_corrects / dataset_sizes['test']

    print('{} Loss: {:.4f} Acc: {:.4f}'.format('test', total_loss, total_acc))

    return total_loss, total_acc


def test_landmark_model(model, criterion):
    
    model.train(False)
    
    running_loss = 0.0
    running_corrects = 0.0
    
    truth = []
    prediction = []

    # Iterate over data.
    for data in dataloaders['test']:
        # get the inputs
        _, labels, landmarks = data

        # wrap them in Variable
        if use_gpu:
            inputs = Variable(landmarks.float().cuda())
            labels = Variable(labels.cuda())
        else:
            inputs, labels = Variable(landmarks.float()), Variable(labels)

        # subsample landmarks for blstm
        b_s = inputs.size(0)
        inputs = inputs.view(b_s, N_frames, 2*N_landmarks)
        slices = int(inputs.size(1)*0.25) 
        idx = np.linspace(0, inputs.size(1)-1, slices, dtype=np.int)
        inputs = inputs[:,idx,:]
        
        # forward
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        
        truth.extend(labels.cpu().data.numpy().tolist())
        prediction.extend(preds.cpu().numpy().tolist())
        
        loss = criterion(outputs, labels)
        
        # statistics
        running_loss += loss.data[0] * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / dataset_sizes['test']
    total_acc = running_corrects / dataset_sizes['test']

    print('{} Loss: {:.4f} Acc: {:.4f}'.format('test', total_loss, total_acc))
    
    cnf_matrix = confusion_matrix(truth, prediction, labels=list(range(N_classes)))

    return total_loss, total_acc

In [None]:
tvgg, bilstm, accuracy = train_model(tvgg, bilstm, criterion, optimizer, num_epochs=10)
test_loss, test_accuracy, conf = test_model(tvgg, bilstm, criterion)

## K-fold crossvalidation

In [None]:
k_accuracy = []

for fold in range(K):
    print('Starting fold ' + str(fold) + ' ...')
    
    # reassign datasets to training, validation and testing
    training_folds = [x for x in range(K) if x != fold]
    validation_fold = random.choice(training_folds)
    training_folds = [x for x in training_folds if x is not validation_fold]
    
    cross_datasets = {}
    cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                              for k in training_folds])
    cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]
    
    cross_datasets['test'] = validation_datasets[k_folders[fold]]

    dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=64,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}
        
    #tvgg = tdense.densenet121(num_classes=7)
    #tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'dense_finetune' + str(fold) + '.pt')))
    
    model = tv.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
    model.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune_oulu' + str(fold) + '.pt')))
    model.eval()
    
    if use_gpu:
        model = model.cuda()
    
    test_loss, test_accuracy = test_single_model(model, criterion)
    k_accuracy.append(test_accuracy)
    
    del model
    
print('Mean value of test accuracy over ' + str(K) + '-fold crossvalidation is: ' 
      + str(sum(k_accuracy) / float(len(k_accuracy))))

In [None]:
k_accuracy = []
save_folder = 'model'

for fold in range(K):
    print('Starting fold ' + str(fold) + ' ...')
    
    # reassign datasets to training, validation and testing
    training_folds = [x for x in range(K) if x != fold]
    validation_fold = random.choice(training_folds)
    training_folds = [x for x in training_folds if x is not validation_fold]
    
    cross_datasets = {}
    cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                              for k in training_folds])
    cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]
    
    cross_datasets['test'] = validation_datasets[k_folders[fold]]

    dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=64,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}
        
    bilstm = torch.load(os.path.join(save_folder, 'manual_oulu' + str(fold) + '.pt'))
    bilstm.eval()
    
    if use_gpu:
        bilstm = bilstm.cuda()
    
    test_loss, test_accuracy = test_landmark_model(bilstm, criterion)
    k_accuracy.append(test_accuracy)
    
    del bilstm
    
print('Mean value of test accuracy over ' + str(K) + '-fold crossvalidation is: ' 
      + str(sum(k_accuracy) / float(len(k_accuracy))))

In [None]:
k_accuracy = []
save_folder = 'model'
conf_math = np.zeros((N_classes, N_classes))

for fold in range(K):
    
    print('Starting fold ' + str(fold) + ' ...')
    
    # reassign datasets to training, validation and testing
    training_folds = [x for x in range(K) if x != fold]
    validation_fold = random.choice(training_folds)
    training_folds = [x for x in training_folds if x is not validation_fold]
    
    cross_datasets = {}
    cross_datasets['train'] = torch.utils.data.ConcatDataset([training_datasets[k_folders[k]] 
                                                              for k in training_folds])
    cross_datasets['val'] = validation_datasets[k_folders[validation_fold]]
    
    cross_datasets['test'] = validation_datasets[k_folders[fold]]

    dataloaders = {x: torch.utils.data.DataLoader(cross_datasets[x], batch_size=batch_size,
                                                 shuffle=True, num_workers=4)
                  for x in ['train', 'val', 'test']}
    dataset_sizes = {x: len(cross_datasets[x]) for x in ['train', 'val', 'test']}
    
    # init model
    #bilstm = Flexible(my_config, N_landmarks, N_classes)
    #bilstm.load_state_dict(torch.load(os.path.join(save_folder, 'phrnn.pt')))
    #bilstm = torch.load(os.path.join(save_folder, 'custom_' + str(fold) + '.pt'))
    bilstm = torch.load(os.path.join(save_folder, 'manual_oulu' + str(fold) + '.pt'))
    
    
    #tvgg = tdense.densenet121(num_classes=7)
    #tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'dense_finetune' + str(fold) + '.pt')))
    
    tvgg = tv.vgg11_bn(num_classes=N_classes, n_frames=N_frames)
    #tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune_ck' + str(fold) + '.pt')))
    tvgg.load_state_dict(torch.load(os.path.join(save_folder, 'vgg_finetune_oulu' + str(fold) + '.pt')))

    if use_gpu:
        bilstm = bilstm.cuda()
        tvgg = tvgg.cuda()

    for param in bilstm.features.parameters():
        param.requires_grad = False

    for param in bilstm.classifier.parameters():
        param.requires_grad = True

    for param in tvgg.features.parameters():
        param.requires_grad = False

    for param in tvgg.classifier.parameters():
        param.requires_grad = True

    parameters = filter(lambda p: p.requires_grad, itertools.chain(bilstm.parameters(), tvgg.parameters()))

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.Adam(parameters, lr=0.001, weight_decay=5e-3)

    # train model
    tvgg, bilstm, val_accuracy = train_model(tvgg, bilstm, criterion, optimizer, num_epochs=3)
    test_loss, test_accuracy, conf = test_model(tvgg, bilstm, criterion)
    conf_math += conf
    
    print('Finished fold ' + str(fold) + ' with validation accuracy of ' + str(val_accuracy))
    k_accuracy.append(test_accuracy)
    del bilstm
    del tvgg

print('Mean value of test accuracy over ' + str(K) + '-fold crossvalidation is: ' 
      + str(sum(k_accuracy) / float(len(k_accuracy))))

In [None]:
plt.boxplot(k_accuracy)

In [None]:
k_accuracy

In [None]:
del bilstm
del tvgg

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    #plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(conf_math, classes=list(classes.values()), normalize=True,
                      title='Normalized confusion matrix')

plt.show()