In [41]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import scipy
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


%matplotlib inline
matplotlib.style.use('bmh')


data_all_thomas = pd.read_excel('annotations/data_all_thomas.xlsx')
data_labels_video_features = pd.read_csv('annotations/combined_labels_video_features.csv')
labels_max = data_labels_video_features[['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val']]
labels_min = data_labels_video_features[['gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val']]
video_features = data_labels_video_features.drop(['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val','gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val'],axis=1)
features = list(video_features.columns)
X_train, X_test, y_train, y_test = train_test_split(video_features, labels_max['gold_gt_max_aro'], test_size=0.20)


In [43]:
def randomforest_model():
    rf = RandomForestClassifier(n_estimators=100, bootstrap=True, max_features='sqrt')
    rf.fit(video_features,labels_max)


def decisiontree_model():
    tree = DecisionTreeClassifier(random_state=50)
    tree.fit(video_features, labels_max)
    print(f'Decision tree has {tree.tree_.node_count} nodes with maximum depth {tree.tree_.max_depth}.')
    export_graphviz(tree, 'tree.dot', rounded = True, feature_names = features, class_names = ['0', '1','2'], filled = True)
    call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=400']);


def svm_model():
    clf = SVC(kernel='linear')
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(confusion_matrix(y_test,y_pred))
    print(classification_report(y_test,y_pred))


svm_model()


[[ 3  4  3]
 [ 4 33 32]
 [ 1 31 81]]
              precision    recall  f1-score   support

           1       0.38      0.30      0.33        10
           2       0.49      0.48      0.48        69
           3       0.70      0.72      0.71       113

    accuracy                           0.61       192
   macro avg       0.52      0.50      0.51       192
weighted avg       0.60      0.61      0.61       192



In [36]:
def save_boxplot(x,y):
    box_data_1 = data_all_thomas[data_all_thomas[x] == 1 ].get(y)
    box_data_2 = data_all_thomas[data_all_thomas[x] == 2 ].get(y)
    box_data_3 = data_all_thomas[data_all_thomas[x] == 3 ].get(y)
    box_data = [box_data_1,box_data_2,box_data_3]
    fig, ax = plt.subplots()
    ax.boxplot(box_data)
    ax.set_ylabel(y)
    ax.set_xlabel(x)
    fig.savefig('boxplots/boxplot_'+x+'_'+y+'.svg')


def create_boxplots():
    classifications = ['agreeableness','conscientiousness','extraversion','interview','neuroticism','openness']
    variables = ['valence','arousal','likeability']
    for x in variables:
        for y in classifications:
            save_boxplot(x,y)


def stats(x):
    data_class_1 = data_all_thomas[data_all_thomas[x] == 1 ].get('interview')
    data_class_2 = data_all_thomas[data_all_thomas[x] == 2 ].get('interview')
    data_class_3 = data_all_thomas[data_all_thomas[x] == 3 ].get('interview')
    print('data1: mean=%.3f stdv=%.3f' % (np.mean(data_class_1), np.std(data_class_1)))
    print('data2: mean=%.3f stdv=%.3f' % (np.mean(data_class_2), np.std(data_class_2)))
    print('data3: mean=%.3f stdv=%.3f' % (np.mean(data_class_3), np.std(data_class_3)))    
    stat, p = ttest_ind(data_class_2, data_class_3) 
    print(stat, p)


def mannWithNeyuScore():
    data_class_1 = data_all_thomas[data_all_thomas['likeability'] == 1 ].get('interview')
    data_class_2 = data_all_thomas[data_all_thomas['likeability'] == 2 ].get('interview')
    data_class_3 = data_all_thomas[data_all_thomas['likeability'] == 3 ].get('interview')
    scipy.stats.mannwhitneyu(data_class_1, data_class_3)