In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from pathlib import Path
import pickle
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,SubsetRandomSampler, ConcatDataset
from torchmetrics import Accuracy

from dataset import PersonalityDataset

from models.mlp import MLPsimple
from models.cnn8 import CNN8simple

In [2]:
dataset_name = 'BFD'
dataset_type = 'enc'

batch_type = 'original' if dataset_type=='enc' else 'normalized'

In [3]:
def train_epoch(model,device,dataloader,loss_fn,optimizer,train_accuracy):
    train_loss, train_correct, train_correct_ocean = 0.0, 0, 0
    model.train()
    for batch in dataloader:
        images, labels = batch[batch_type], batch['label']
        images = images.to(device)
        optimizer.zero_grad()
        output = model(images)
        labels = torch.tensor(labels, dtype=torch.float32, device=output.device)
        loss = loss_fn(output.flatten(), labels.flatten())
        train_accuracy(output, labels.to(torch.int64))
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        predictions = torch.where(output>0, 1, 0)
        train_correct += (predictions == labels.to(torch.int64)).sum().item()
        train_correct_ocean += (predictions == labels.to(torch.int64)).sum(dim=0)

    return train_loss, train_correct, train_correct_ocean
  
def valid_epoch(model,device,dataloader,loss_fn,val_accuracy):
    valid_loss, val_correct, val_correct_ocean = 0.0, 0, 0
    model.eval()
    for batch in dataloader:
        images, labels = batch[batch_type], batch['label']
        images = images.to(device)
        output = model(images)
        labels = torch.tensor(labels, dtype=torch.float32, device=output.device)
        loss = loss_fn(output.flatten(),labels.flatten())
        val_accuracy(output, labels.to(torch.int64))
        valid_loss += loss.item() * images.size(0)
        predictions = torch.where(output>0, 1, 0)
        val_correct += (predictions == labels.to(torch.int64)).sum().item()
        val_correct_ocean += (predictions == labels.to(torch.int64)).sum(dim=0)

    return valid_loss, val_correct, val_correct_ocean

In [4]:
LocationConfig_data = f'data/{dataset_name}/{dataset_type}/'

model_name = f'{dataset_name}_{dataset_type}'
params = {}
params['BFD_enc'] = {'batch_norm': True,'batch_size': 16,'dropout': 0.4,'lr': 0.001,'negative_slope': 0.05}
params['BFD_gray'] = {'batch_norm': False,'batch_size': 16,'dropout': 0.4,'lr': 0.001,'negative_slope': 0.1}
params['BFD_rgb'] = {'batch_norm': False,'batch_size': 8,'dropout': 0.0,'lr': 0.00005,'negative_slope': 0.02}
params['ChaLearn_enc'] = {'batch_norm': False,'batch_size': 4,'dropout': 0.3,'lr': 0.001,'negative_slope': 0.1}
params['ChaLearn_gray'] = {'batch_norm': False,'batch_size': 4,'dropout': 0.0,'lr': 0.001,'negative_slope': 0.01}
params['ChaLearn_rgb'] = {'batch_norm': False,'batch_size': 8,'dropout': 0.0,'lr': 0.00005,'negative_slope': 0.1}

epochs = {}
epochs['BFD_enc'] = 10
epochs['BFD_gray'] = 70
epochs['BFD_rgb'] = 17
epochs['ChaLearn_enc'] = 6
epochs['ChaLearn_gray'] = 5
epochs['ChaLearn_rgb'] = 4

In [5]:
train_dataset = PersonalityDataset(Path(LocationConfig_data + 'train/'))
test_dataset = PersonalityDataset(Path(LocationConfig_data + 'test/'))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
dataset = ConcatDataset([train_dataset, test_dataset])

m=len(train_dataset)

file: data/BFD/enc/train/train.pickle
file: data/BFD/enc/test/test.pickle


In [6]:
if dataset_name == 'BFD':
    split_arange = np.arange(1, 41)
else:
    split_arange = list(pd.read_csv('data/ChaLearn/bigfive_labels_mean.csv', index_col=0)['ShortVideoName'].values)

In [7]:
k=10
splits=KFold(n_splits=k,shuffle=True,random_state=42)
foldperf={}

for fold, (train_num,val_num) in enumerate(splits.split(split_arange)):
    train_accuracy = Accuracy(threshold=0.0).cuda()
    val_accuracy = Accuracy(threshold=0.0).cuda()
    train_idx = []
    val_idx = []
    val_num_name = np.array(split_arange)[np.array(val_num)]
    for i, data in enumerate(dataset):
        if data['num'] in val_num_name:
            val_idx.append(i)
        else:
            train_idx.append(i)
    print(len(val_idx), len(train_idx))
    print('Fold {}'.format(fold + 1))
    criterion = nn.BCEWithLogitsLoss()  
    train_sampler = SubsetRandomSampler(train_idx)
    test_sampler = SubsetRandomSampler(val_idx)
    train_loader = DataLoader(dataset, batch_size=params[model_name]['batch_size'], sampler=train_sampler)
    test_loader = DataLoader(dataset, batch_size=params[model_name]['batch_size'], sampler=test_sampler)
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    if dataset_type=='enc':
        model = MLPsimple(**params[model_name])
    else:
        model = CNN8simple(data_type=dataset_type, dataset=dataset_name, **params[model_name])
    
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=params[model_name]['lr'])

    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[],'train_acc_2':[],'test_acc_2':[],'train_acc_ocean':[],'test_acc_ocean':[]}

    for epoch in range(epochs[model_name]):
        train_loss, train_correct, train_correct_ocean = train_epoch(model,device,train_loader,criterion,optimizer,train_accuracy)
        test_loss, test_correct, test_correct_ocean = valid_epoch(model,device,test_loader,criterion,val_accuracy)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / (len(train_loader.sampler) * 5) * 100
        train_acc_2 = train_accuracy.compute() * 100
        train_acc_ocean = train_correct_ocean / len(train_loader.sampler) * 100
        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / (len(test_loader.sampler) * 5) * 100
        test_acc_2 = val_accuracy.compute() * 100
        test_acc_ocean = test_correct_ocean / len(test_loader.sampler) * 100

        print("F {} | E:{}/{} Tra Loss:{:.3f} Test Loss:{:.3f} Tra Acc {:.2f}% | {:.2f}% Test Acc {:.2f}% | {:.2f}%".format(
            fold + 1,
            epoch + 1,
            epochs[model_name],
            train_loss,
            test_loss,
            train_acc,
            train_acc_2,
            test_acc,
            test_acc_2
            ))
        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)
        history['train_acc_2'].append(train_acc_2.item())
        history['test_acc_2'].append(test_acc_2.item())
        print([t.item() for t in train_acc_ocean])
        history['train_acc_ocean'].append([t.item() for t in train_acc_ocean])
        history['test_acc_ocean'].append([t.item() for t in test_acc_ocean])

    foldperf['fold{}'.format(fold+1)] = history  

torch.save(model,f'model/k_cross/{dataset_name}_{dataset_type}.pt')
a_file = open(f'results/{dataset_name}_{dataset_type}.pkl', 'wb')
pickle.dump(foldperf, a_file)
a_file.close()

44 396
Fold 1


  labels = torch.tensor(labels, dtype=torch.float32, device=output.device)
  labels = torch.tensor(labels, dtype=torch.float32, device=output.device)


F 1 | E:1/5 Tra Loss:0.689 Test Loss:0.666 Tra Acc 58.08% | 58.08% Test Acc 68.64% | 68.64%
[60.60606384277344, 64.39393615722656, 55.30303192138672, 54.797977447509766, 55.30303192138672]
F 1 | E:2/5 Tra Loss:0.661 Test Loss:0.668 Tra Acc 60.86% | 59.47% Test Acc 70.00% | 69.32%
[63.6363639831543, 66.66667175292969, 59.5959587097168, 60.85858154296875, 53.53535461425781]
F 1 | E:3/5 Tra Loss:0.630 Test Loss:0.677 Tra Acc 66.11% | 61.68% Test Acc 54.55% | 64.39%
[66.16161346435547, 69.69696807861328, 65.90909576416016, 72.97979736328125, 55.80807876586914]
F 1 | E:4/5 Tra Loss:0.623 Test Loss:0.675 Tra Acc 66.67% | 62.93% Test Acc 55.00% | 62.05%
[72.22222137451172, 70.45454406738281, 63.38383483886719, 68.18181610107422, 59.09090805053711]
F 1 | E:5/5 Tra Loss:0.609 Test Loss:0.666 Tra Acc 68.74% | 64.09% Test Acc 57.73% | 61.18%
[70.2020263671875, 71.96969604492188, 65.40403747558594, 73.48484802246094, 62.626258850097656]
44 396
Fold 2
F 2 | E:1/5 Tra Loss:0.726 Test Loss:0.691 Tra 