In [1]:
import numpy as np
import os
from sklearn.model_selection import KFold # had to do pip install sklearn in this notebook
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# This notebook emulates main.py with --data ./data --feat all --clf svm --nor one --select rfe --num 1

In [3]:
data = './data'
num_features_selected = 1

In [4]:
SUBJECT_NUM = 40
VIDEO_NUM = 16
SAMPLE_RATE = 128.
MISSING_DATA_SUBJECT = [9, 12, 21, 22, 23, 24, 33]

In [5]:
amigos_data = np.loadtxt(os.path.join(data, 'features.csv'), delimiter=',')

In [6]:
amigos_data.shape

(528, 215)

In [9]:
amigos_data

array([[ 3.75527470e+05,  4.19887678e+00,  8.12293791e+01, ...,
        -7.03652450e-01,  2.93860000e+00,  5.00000000e+00],
       [ 4.14585010e+05,  1.26145045e+01,  7.09236174e+01, ...,
        -6.18595482e-01,  4.87710000e+00,  6.54270000e+00],
       [ 3.57766594e+05,  6.36469543e+00,  5.29660656e+01, ...,
        -2.85485239e-01,  5.00000000e+00,  3.23890000e+00],
       ...,
       [ 3.04208478e+05,  3.91586818e+00,  1.18190286e+02, ...,
         2.35810708e-01,  5.00000000e+00,  6.66780000e+00],
       [ 1.83664760e+05,  1.65597355e+00,  4.68708066e+01, ...,
         2.53220187e-01,  5.55590000e+00,  6.28810000e+00],
       [ 1.92068764e+05,  8.80936285e-01,  6.27798844e+01, ...,
         2.54748148e-01,  6.45080000e+00,  6.09830000e+00]])

In [7]:
# 1st column of labels is arousal labels, 2nd column is valence labels. 
# Labels are chunked into SUBJECT_NUM-len(MISSING_DATA_SUBJECT) chunks of size VIDEO_NUM
# Discretizing all the labels, downsampling to 1 label per amigos_data datapoint

labels = np.loadtxt(os.path.join(data, 'label.csv'), delimiter=',')[:, :2]
a_labels, v_labels = [], []
for i in range(SUBJECT_NUM):
    if i + 1 in MISSING_DATA_SUBJECT:
        continue
    a_labels_mean = np.mean(labels[i * VIDEO_NUM:i * VIDEO_NUM + VIDEO_NUM, 0])
    v_labels_mean = np.mean(labels[i * VIDEO_NUM:i * VIDEO_NUM + VIDEO_NUM, 1])
    for idx, label in enumerate(labels[i * VIDEO_NUM:i * VIDEO_NUM + VIDEO_NUM, :]):
        a_tmp = 1 if label[0] > a_labels_mean else 0
        v_tmp = 1 if label[1] > v_labels_mean else 0
        a_labels.append(a_tmp)
        v_labels.append(v_tmp)
a_labels, v_labels = np.array(a_labels), np.array(v_labels)

In [16]:
a_labels.shape

(528,)

In [10]:
# setup kfold cross validator
kfold = KFold(n_splits=SUBJECT_NUM - len(MISSING_DATA_SUBJECT))

In [11]:
# setup classifier
a_clf = SVC(C=0.75, kernel='linear')
v_clf = SVC(C=0.2, kernel='linear')

In [12]:
# setup RFE feature selection
a_clf_select = RFE(a_clf, num_features_selected, verbose=0)
v_clf_select = RFE(v_clf, num_features_selected, verbose=0)



In [12]:
train_a_accuracy_history = []
train_v_accuracy_history = []
train_a_f1score_history = []
train_v_f1score_history = []
val_a_accuracy_history = []
val_v_accuracy_history = []
val_a_f1score_history = []
val_v_f1score_history = []
a_idx_history = np.zeros(amigos_data.shape[1])
v_idx_history = np.zeros(amigos_data.shape[1])

In [13]:
for idx, (train_idx, val_idx) in enumerate(kfold.split(amigos_data)):
    print(idx + 1, 'Fold Start')

    # collect data for cross validation
    train_data, val_data = amigos_data[train_idx], amigos_data[val_idx]
    train_a_labels, val_a_labels = a_labels[train_idx], a_labels[val_idx]
    train_v_labels, val_v_labels = v_labels[train_idx], v_labels[val_idx]
    
    # map features to fit within range [-1,1]
    train_data_max = np.max(train_data, axis=0)
    train_data_min = np.min(train_data, axis=0)
    train_data = (train_data - train_data_min) / (train_data_max - train_data_min)
    train_data = train_data * 2 - 1
    val_data_max = np.max(val_data, axis=0)
    val_data_min = np.min(val_data, axis=0)
    val_data = (val_data - val_data_min) / (val_data_max - val_data_min)
    val_data = val_data * 2 - 1
        
    # fit feature selection
    a_clf_select.fit(train_data, train_a_labels)
    v_clf_select.fit(train_data, train_v_labels)
    train_a_data = a_clf_select.transform(train_data)
    train_v_data = v_clf_select.transform(train_data)
    val_a_data = a_clf_select.transform(val_data)
    val_v_data = v_clf_select.transform(val_data)
    a_idx = np.where(a_clf_select.ranking_ == 1)
    v_idx = np.where(v_clf_select.ranking_ == 1)
        
    # fit classifier
    a_clf.fit(train_a_data, train_a_labels)
    v_clf.fit(train_v_data, train_v_labels)

    # predict arousal and valence
    train_a_predict_labels = a_clf.predict(train_a_data)
    train_v_predict_labels = v_clf.predict(train_v_data)
    val_a_predict_labels = a_clf.predict(val_a_data)
    val_v_predict_labels = v_clf.predict(val_v_data)

    # metrics (accuracy and f1 score) calculation
    train_a_accuracy = accuracy_score(train_a_labels, train_a_predict_labels)
    train_v_accuracy = accuracy_score(train_v_labels, train_v_predict_labels)
    train_a_f1score = f1_score(train_a_labels, train_a_predict_labels, average='macro')
    train_v_f1score = f1_score(train_v_labels, train_v_predict_labels, average='macro')
    val_a_accuracy = accuracy_score(val_a_labels, val_a_predict_labels)
    val_v_accuracy = accuracy_score(val_v_labels, val_v_predict_labels)
    val_a_f1score = f1_score(val_a_labels, val_a_predict_labels, average='macro')
    val_v_f1score = f1_score(val_v_labels, val_v_predict_labels, average='macro')

    train_a_accuracy_history.append(train_a_accuracy)
    train_v_accuracy_history.append(train_v_accuracy)
    train_a_f1score_history.append(train_a_f1score)
    train_v_f1score_history.append(train_v_f1score)
    val_a_accuracy_history.append(val_a_accuracy)
    val_v_accuracy_history.append(val_v_accuracy)
    val_a_f1score_history.append(val_a_f1score)
    val_v_f1score_history.append(val_v_f1score)

    print('Training Result')
    print("Arousal: Accuracy: {:.4f}, F1score: {:.4f}".format(train_a_accuracy, train_a_f1score))
    print("Valence: Accuracy: {:.4f}, F1score: {:.4f}".format(train_v_accuracy, train_v_f1score))
    print('Validating Result')
    print("Arousal: Accuracy: {:.4f}, F1score: {:.4f}".format(val_a_accuracy, val_a_f1score))
    print("Valence: Accuracy: {:.4f}, F1score: {:.4f}".format(val_v_accuracy, val_v_f1score))

1 Fold Start
Training Result
Arousal: Accuracy: 0.8691, F1score: 0.8691
Valence: Accuracy: 0.9492, F1score: 0.9490
Validating Result
Arousal: Accuracy: 1.0000, F1score: 1.0000
Valence: Accuracy: 0.9375, F1score: 0.9373
2 Fold Start
Training Result
Arousal: Accuracy: 0.8789, F1score: 0.8789
Valence: Accuracy: 0.9473, F1score: 0.9471
Validating Result
Arousal: Accuracy: 0.7500, F1score: 0.7460
Valence: Accuracy: 0.9375, F1score: 0.9352
3 Fold Start
Training Result
Arousal: Accuracy: 0.8711, F1score: 0.8710
Valence: Accuracy: 0.9492, F1score: 0.9491
Validating Result
Arousal: Accuracy: 1.0000, F1score: 1.0000
Valence: Accuracy: 0.9375, F1score: 0.9352
4 Fold Start
Training Result
Arousal: Accuracy: 0.8711, F1score: 0.8711
Valence: Accuracy: 0.9473, F1score: 0.9471
Validating Result
Arousal: Accuracy: 0.9375, F1score: 0.9086
Valence: Accuracy: 0.9375, F1score: 0.9373
5 Fold Start
Training Result
Arousal: Accuracy: 0.8867, F1score: 0.8867
Valence: Accuracy: 0.9453, F1score: 0.9452
Validatin

In [14]:
v_clf.dual_coef_.shape

(1, 150)

In [15]:
print('\nAverage Training Result')
print("Arousal => Accuracy: {:.4f}, F1score: {:.4f}".format(
    np.mean(train_a_accuracy_history), np.mean(train_a_f1score_history)))
print("Valence => Accuracy: {:.4f}, F1score: {:.4f}".format(
    np.mean(train_v_accuracy_history), np.mean(train_v_f1score_history)))
print('Average Validating Result')
print("Arousal => Accuracy: {:.4f}, F1score: {:.4f}".format(
    np.mean(val_a_accuracy_history), np.mean(val_a_f1score_history)))
print("Valence => Accuracy: {:.4f}, F1score: {:.4f}\n".format(
    np.mean(val_v_accuracy_history), np.mean(val_v_f1score_history)))


Average Training Result
Arousal => Accuracy: 0.8740, F1score: 0.8740
Valence => Accuracy: 0.9479, F1score: 0.9478
Average Validating Result
Arousal => Accuracy: 0.9186, F1score: 0.9068
Valence => Accuracy: 0.9659, F1score: 0.9605

