In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib
import matplotlib.pyplot as plt
import scipy
from subprocess import call
from IPython.display import Image
from sklearn.model_selection import train_test_split
from scipy import stats

%matplotlib inline
matplotlib.style.use('bmh')

data_all_thomas = pd.read_excel('../annotations/data_all_thomas.xlsx')
data_labels_video_features = pd.read_csv('../annotations/combined_labels_video_features.csv')
labels = data_labels_video_features[['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val','gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val']]
#labels_max = labels[['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val']]
#labels_min = labels[['gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val']]


In [25]:
# Set video features, No Normalization
video_features = data_labels_video_features.drop(['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val','gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val'],axis=1)

In [26]:
# Applying z normalization
video_features = video_features.apply(stats.zscore)

In [27]:
# Applying L2 normalization
video_features = pd.DataFrame(sk.preprocessing.normalize(video_features, norm='l2',axis=1))

In [None]:
# Splitting and converting train and test data
X_train, X_test, y_train, y_test = train_test_split(video_features, data_all_thomas.agreeableness_binary, train_size=440,test_size=220,shuffle=False)


X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [34]:
whoopise = [data_all_thomas.interview_binary[:660].to_numpy(), data_all_thomas.agreeableness_binary[:660].to_numpy()]

In [38]:
# 3 Fold Cross Validation KELM
from sklearn.model_selection import KFold
import kernel_elm as elm
from sklearn.metrics import recall_score


variables = data_all_thomas[['agreeableness_binary','conscientiousness_binary','extraversion_binary','neuroticism_binary','openness_binary','interview_binary','arousal','valence','likeability']][:660]
hyperparams_c = [100000,10000,1000,100,10,1,0.1,0.01,0.001,0.0001,0.00001]


for variable in whoopise:
    kfold = KFold(3,False,1)
    for c in hyperparams_c:
        for train, test in kfold.split(variable):
            kelm = elm.Extreme_Learning_Machine(kernel="linear",weighted=False,C=c, model_type="classification")
            kelm.train(video_features.iloc[train].to_numpy(),variable[train])
            y_predict = kelm.test(video_features.iloc[test].to_numpy())
            score = recall_score(variable[test], y_predict,average='macro')
            print('UAR: %s' % (score))


UAR: 0.6547500827540549
UAR: 0.5889093342254934
UAR: 0.6448281846581048
UAR: 0.6547500827540549
UAR: 0.5889093342254934
UAR: 0.6448281846581048
UAR: 0.6547500827540549
UAR: 0.5889093342254934
UAR: 0.6448281846581048
UAR: 0.6597980801059251
UAR: 0.5930076948812312
UAR: 0.6448281846581048
UAR: 0.6645150612380006
UAR: 0.6124121779859485
UAR: 0.6527247483512669
UAR: 0.7093677590201919
UAR: 0.6513047842087655
UAR: 0.6377993752169386
UAR: 0.6607911287653094
UAR: 0.6329876212780194
UAR: 0.6228740020826102
UAR: 0.6152763985435286
UAR: 0.5726831716293075
UAR: 0.6398819854217286
UAR: 0.5744786494538232
UAR: 0.5808798929407829
UAR: 0.6419645956265185
UAR: 0.5741476332340285
UAR: 0.5808798929407829
UAR: 0.6324192988545644
UAR: 0.5741476332340285
UAR: 0.5808798929407829
UAR: 0.6324192988545644
UAR: 0.6385093167701863
UAR: 0.4964797586120191
UAR: 0.5780304966351478
UAR: 0.6385093167701863
UAR: 0.4964797586120191
UAR: 0.5780304966351478
UAR: 0.6385093167701863
UAR: 0.5005447992624256
UAR: 0.574154527

In [None]:
# Single ELM
import kernel_elm as elm
from sklearn.metrics import classification_report, confusion_matrix,recall_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score

elmk = elm.Extreme_Learning_Machine(kernel="linear",weighted=False,C=1, model_type="classification")
#beta = elmk.train(X_train,y_train)
#y_predict = elmk.test(X_test)
#recall_score(y_test, y_predict,average='macro')
#mean_squared_error(y_test,y_predict,squared=False)
#mean_absolute_error(y_test,y_predict)
scores = cross_val_score(elm, X_train,y_train, cv=3, scoring="recall_macro")

In [None]:
# Decision Tree pre-processing
# Converting categorical variables to one-hot encoding
arousal_dummy = pd.get_dummies(data_all_thomas[['arousal']].astype(str))[:660].to_numpy()
valence_dummy = pd.get_dummies(data_all_thomas[['valence']].astype(str))[:660].to_numpy()
likeability_dummy = pd.get_dummies(data_all_thomas[['likeability']].astype(str))[:660].to_numpy()

arousal_weighted = arousal_dummy * beta_arousal
valence_weighted = valence_dummy * beta_valence
likeability_weighted = likeability_dummy * beta_likeability

x_train_mood_weighted_categorical = np.concatenate((arousal_weighted, valence_weighted, likeability_weighted), axis=1)


In [None]:
# Random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree


x_train_bin = data_all_thomas[['agreeableness_binary','conscientiousness_binary','extraversion_binary','neuroticism_binary','openness_binary']]
y_train_bin = data_all_thomas[['interview_binary']][:660]

rf = RandomForestClassifier(n_estimators=100, bootstrap=True, max_features='sqrt', max_depth=4)
rf.fit(x_train_mood_weighted_categorical,y_train_bin)

In [None]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import classification_report, confusion_matrix,recall_score


person_bin = data_all_thomas[['agreeableness_binary','conscientiousness_binary','extraversion_binary','neuroticism_binary','openness_binary']]
mood_cat = data_all_thomas[['arousal','valence','likeability']]
mood_person_combined = data_all_thomas[['agreeableness_binary','conscientiousness_binary','extraversion_binary','neuroticism_binary','openness_binary','arousal','valence','likeability']]
interview_bin = data_all_thomas[['interview_binary']]

X_train, X_test, y_train, y_test = train_test_split(mood_person_combined, interview_bin, train_size=660,test_size=300,shuffle=False)

clf = DecisionTreeClassifier(random_state=50, max_depth=9, min_samples_leaf=20)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)
recall_score(y_test, y_predict, average="macro")

In [None]:
# Generete dot file for tree and visualize using graphviz
import graphviz 


dot_data = tree.export_graphviz(clf, out_file=None,feature_names=mood_person_combined.columns,class_names=['not invited','invited'],filled=True, rounded=True,special_characters=True)  
graph = graphviz.Source(dot_data)
graph 

In [None]:
# Multiple ELM
import kernel_elm as elm
from sklearn.metrics import classification_report, confusion_matrix,recall_score


variables_mood = [labels['gold_gt_min_aro'],labels['gold_gt_min_like'],labels['gold_gt_min_val'],data_all_thomas['arousal'],data_all_thomas['likeability'],data_all_thomas['valence']]
variables_personality_binary = [data_all_thomas.agreeableness_binary, data_all_thomas.conscientiousness_binary, data_all_thomas.extraversion_binary, data_all_thomas.neuroticism_binary, data_all_thomas.openness_binary, data_all_thomas.interview_binary]
variables_personality_continuous = [data_all_thomas.agreeableness, data_all_thomas.conscientiousness, data_all_thomas.extraversion, data_all_thomas.neuroticism, data_all_thomas.openness, data_all_thomas.interview]

hyperparams_c = [100000,10000,1000,100,10,1,0.1,0.01,0.001,0.0001,0.00001]
gammas = [100,10,1,0.1,0.01,0.001,0.0001,0.00001,0.000001]
weights = [True, False]

report_kelms = pd.DataFrame(columns=['variable'] + hyperparams_c)

for weight in weights:
    for variable in variables_personality_binary:
        row = [str(weight) + ' - ' + variable.name]
        for c_var in hyperparams_c:
            X_train, X_test, y_train, y_test = train_test_split(video_features, variable, train_size=660,test_size=300,shuffle=False)
            X_train = X_train.to_numpy()
            X_test = X_test.to_numpy()
            y_train = y_train.to_numpy()
            y_test = y_test.to_numpy()
            kelm = elm.Extreme_Learning_Machine(C=c_var, kernel='linear', weighted=weight, model_type="classification")
            kelm.train(X_train,y_train)
            y_pred_single = kelm.test(X_test)
            row = row + [recall_score(y_test, y_pred_single,average='macro')]
        report_kelms = report_kelms.append(pd.Series(row,index=report_kelms.columns),ignore_index=True)


report_kelms.to_csv('report_personality_kelm_linear_weighted_c_binary_l2.csv')

In [None]:
# Multiple ELM 