In [43]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import scipy
from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image
from sklearn.model_selection import train_test_split
from scipy import stats

%matplotlib inline
matplotlib.style.use('bmh')

In [45]:
data_all_thomas = pd.read_excel('annotations/data_all_thomas.xlsx')
data_labels_video_features = pd.read_csv('annotations/combined_labels_video_features.csv')
labels_max = data_labels_video_features[['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val']]
labels_min = data_labels_video_features[['gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val']]
video_features = data_labels_video_features.drop(['gold_gt_max_aro','gold_gt_max_like','gold_gt_max_val','gold_gt_min_aro','gold_gt_min_like','gold_gt_min_val'],axis=1)
video_features.apply(stats.zscore)
features = list(video_features.columns)


In [8]:
from sklearn.ensemble import RandomForestClassifier


def randomforest_model():
    rf = RandomForestClassifier(n_estimators=100, bootstrap=True, max_features='sqrt')
    rf.fit(video_features,labels_max)

In [9]:
from sklearn.tree import DecisionTreeClassifier


def decisiontree_model():
    tree = DecisionTreeClassifier(random_state=50)
    tree.fit(video_features, labels_max)
    print(f'Decision tree has {tree.tree_.node_count} nodes with maximum depth {tree.tree_.max_depth}.')
    export_graphviz(tree, 'tree.dot', rounded = True, feature_names = features, class_names = ['0', '1','2'], filled = True)
    call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=400']);

In [52]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from mpl_toolkits import mplot3d

X_train, X_test, y_train, y_test = train_test_split(video_features, labels_min['gold_gt_min_aro'], test_size=0.30)

clf = SVC(kernel='rbf')
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test,y_pred))




precision    recall  f1-score   support

           1       0.00      0.00      0.00        31
           2       0.60      1.00      0.75       173
           3       0.00      0.00      0.00        84

    accuracy                           0.60       288
   macro avg       0.20      0.33      0.25       288
weighted avg       0.36      0.60      0.45       288



In [10]:
def save_boxplot(x,y):
    box_data_1 = data_all_thomas[data_all_thomas[x] == 1 ].get(y)
    box_data_2 = data_all_thomas[data_all_thomas[x] == 2 ].get(y)
    box_data_3 = data_all_thomas[data_all_thomas[x] == 3 ].get(y)
    box_data = [box_data_1,box_data_2,box_data_3]
    fig, ax = plt.subplots()
    ax.boxplot(box_data)
    ax.set_ylabel(y)
    ax.set_xlabel(x)
    fig.savefig('boxplots/boxplot_'+x+'_'+y+'.svg')


def create_boxplots():
    classifications = ['agreeableness','conscientiousness','extraversion','interview','neuroticism','openness']
    variables = ['valence','arousal','likeability']
    for x in variables:
        for y in classifications:
            save_boxplot(x,y)

In [12]:
def stats(x):
    data_class_1 = data_all_thomas[data_all_thomas[x] == 1 ].get('interview')
    data_class_2 = data_all_thomas[data_all_thomas[x] == 2 ].get('interview')
    data_class_3 = data_all_thomas[data_all_thomas[x] == 3 ].get('interview')
    print('data1: mean=%.3f stdv=%.3f' % (np.mean(data_class_1), np.std(data_class_1)))
    print('data2: mean=%.3f stdv=%.3f' % (np.mean(data_class_2), np.std(data_class_2)))
    print('data3: mean=%.3f stdv=%.3f' % (np.mean(data_class_3), np.std(data_class_3)))    
    stat, p = ttest_ind(data_class_2, data_class_3) 
    print(stat, p)

In [11]:
def mannWithNeyuScore():
    data_class_1 = data_all_thomas[data_all_thomas['likeability'] == 1 ].get('interview')
    data_class_2 = data_all_thomas[data_all_thomas['likeability'] == 2 ].get('interview')
    data_class_3 = data_all_thomas[data_all_thomas['likeability'] == 3 ].get('interview')
    scipy.stats.mannwhitneyu(data_class_1, data_class_3)