# OpenSMILE Analysis
This notebook loads OpenSMILE csv- data, cleans and plots it

## Import relevant libraries

In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st

import seaborn as sns
import Helper as hp

## Load .csv data with results of OpenSMILE Analysis
First we load .csv data and clean it (removing of NaNs), then we store information of all files in seperate panda dataframes containing information about affect, emotion and valence/arousal for all participants.

In [2]:
# Get Filenames to iterate
filenames = listdir('OpenSMILE_Data')

data = hp.constructDataFrames(filenames)
df_emotion = data[0][0]
df_affect = data[0][1]
df_loi = data[0][2]
df_ar_val = data[0][3]
emotion_label = data[1][0]
affect_label = data[1][1]
loi_label = data[1][2]
characterIDs = data[1][3]

## Let's load information about the speakers
The speaker ID is saved in a single .csv file containing four important columns: ID, Age, Sex and Acadedmic Status. Since before loaded OpenSMILE csv files are named using the corresponding index (ex. speaker with id 0 has two files 0_a.csv and 0_b.csv), so that a link can be created

In [3]:
char_data = pd.read_csv("CharacterData/CharacterIDs.csv")    

#Join above tables and Character Tables

#To Join DataFrames we have to cast the column on which we want to join to int, so that both columns have the same data type
char_data['CharacterID'] = char_data['CharacterID'].astype(int)
df_ar_val['CharacterID'] = df_ar_val['CharacterID'].astype(int)
df_emotion['CharacterID'] = df_emotion['CharacterID'].astype(int)
df_affect['CharacterID'] = df_affect['CharacterID'].astype(int)
df_loi['CharacterID'] = df_loi['CharacterID'].astype(int)

#Safe new data frames
df_ar_val_char = df_ar_val.merge(char_data, how = 'left', left_on='CharacterID', right_on='CharacterID')
df_emotion_char = df_emotion.merge(char_data, how = 'left', left_on='CharacterID', right_on= 'CharacterID')
df_affect_char = df_affect.merge(char_data, how = 'left', left_on='CharacterID', right_on= 'CharacterID')
df_loi_char = df_loi.merge(char_data, how = 'left', left_on='CharacterID', right_on= 'CharacterID')

## Now have a look at correlation
### Have a look at arousal and what it corrlates with

In [4]:
#'Clean' our dataFrames so that we can call the panda .corr function (default is spearman)
#Let's start with arousal and look how correlation is between emotions
print('Emotions and Arousal')
cor_ar_emo = hp.correlations(df_ar_val['arousal'], df_emotion, emotion_label)
# Now arousal and affect
print('Affect and Arousal')
cor_ar_aff = hp.correlations(df_ar_val['arousal'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and Arousal')
cor_ar_loi = hp.correlations(df_ar_val['arousal'], df_loi, loi_label)

Emotions and Arousal
Correlation between arousal and anger: -0.6834858044692085
Cohen d: 1.3540103907062906
Correlation between arousal and boredom: -0.42307029758274
Cohen d: 0.1583736511748122
Correlation between arousal and disgust: -0.15806251433006668
Cohen d: -0.7373572423651821
Correlation between arousal and fear: -0.6184722018376901
Cohen d: 1.3418849887875337
Correlation between arousal and happiness: -0.5789869405166657
Cohen d: 1.2675246862010017
Correlation between arousal and neutral: -0.356264634376464
Cohen d: 1.073710483184311
Correlation between arousal and sadness: 0.483425559781051
Cohen d: -2.861793647848958
Affect and Arousal
Correlation between arousal and aggressiv: -0.6249619461578844
Cohen d: -0.05694323448409338
Correlation between arousal and cheerful: -0.7547280735964071
Cohen d: -1.7333878080657694
Correlation between arousal and intoxicated: 0.8192158568074172
Cohen d: -1.3141756512058569
Correlation between arousal and nervous: -0.4817454038193749
Cohen 

### Now look at valence

In [5]:
print('Emotions and valence')
cor_val_emo = hp.correlations(df_ar_val['valence'], df_emotion, emotion_label)

# Now valence and affect
print('Affect and valence')
cor_val_aff = hp.correlations(df_ar_val['valence'], df_affect, affect_label)

# Now arousal and level of Interest
print('Level of Interest and valence')
cor_val_loi = hp.correlations(df_ar_val['valence'], df_loi, loi_label)

Emotions and valence
Correlation between valence and anger: -0.42700028268595
Cohen d: 2.124212618215266
Correlation between valence and boredom: 0.16831427339329016
Cohen d: 1.198707389958022
Correlation between valence and disgust: -0.17965790687690897
Cohen d: 0.03344577381746021
Correlation between valence and fear: -0.3312140972517545
Cohen d: 2.1168342579669335
Correlation between valence and happiness: -0.3991203323202824
Cohen d: 2.071387756958357
Correlation between valence and neutral: 0.2970377418952006
Cohen d: 1.9499189451425651
Correlation between valence and sadness: 0.12493730716157896
Cohen d: -1.7990379478117744
Affect and valence
Correlation between valence and aggressiv: -0.5966541351575038
Cohen d: 1.1899909839878873
Correlation between valence and cheerful: -0.16359981081171018
Cohen d: -0.18032792829621663
Correlation between valence and intoxicated: 0.3355725268618499
Cohen d: -0.38644640823664206
Correlation between valence and nervous: 0.27861337746066017
Cohe

### Now look at the different emotions and how they correlate with affect and level of interest

In [6]:
# Now emotion and affect
print('ANGER')
print('Affect and anger')
cor_ang_aff = hp.correlations(df_emotion['anger'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and anger')
cor_ang_loi = hp.correlations(df_emotion['anger'], df_loi, loi_label)
##########
# Now look at boredom
print('BOREDOM')
print('Affect and boredom')
cor_bor_aff = hp.correlations(df_emotion['boredom'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and boredom')
cor_bor_loi = hp.correlations(df_emotion['boredom'], df_loi, loi_label)
##########
# Disgust
print('DISGUST')
print('Affect and disgust')
cor_dis_aff = hp.correlations(df_emotion['disgust'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and disgust')
cor_dis_loi = hp.correlations(df_emotion['disgust'], df_loi, loi_label)
######
# fear
print('FEAR')
print('Affect and fear')
cor_fea_aff = hp.correlations(df_emotion['fear'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and fear')
cor_fea_loi = hp.correlations(df_emotion['fear'], df_loi, loi_label)
##########
# happiness
print('HAPPINESS')
print('Affect and happiness')
cor_hap_aff = hp.correlations(df_emotion['happiness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and happiness')
cor_hap_loi = hp.correlations(df_emotion['happiness'], df_loi, loi_label)
##########
# neutral
print('NEUTRAL')
print('Affect and neutral')
cor_eneu_aff = hp.correlations(df_emotion['neutral'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and neutral')
cor_eneu_loi = hp.correlations(df_emotion['neutral'], df_loi, loi_label)
##########
# Sadness
print('SADNESS')
print('Affect and sadness')
cor_sad_aff = hp.correlations(df_emotion['sadness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and sadness')
cor_sad_loi = hp.correlations(df_emotion['sadness'], df_loi, loi_label)

ANGER
Affect and anger
Correlation between anger and aggressiv: 0.8219437272699538
Cohen d: -2.5220828261171917
Correlation between anger and cheerful: 0.8336310706800888
Cohen d: -3.756066144914373
Correlation between anger and intoxicated: -0.7093611149907214
Cohen d: -2.0544981822259185
Correlation between anger and nervous: 0.4679141958102869
Cohen d: -3.899662916493322
Correlation between anger and neutral: -0.275407946565424
Cohen d: -1.6917353317472992
Correlation between anger and tired: 0.30124420924847395
Cohen d: -1.203980867797832
Level of Interest and anger
Correlation between anger and disinterest: 0.009259088185940937
Cohen d: -0.7916027783022161
Correlation between anger and normal: 0.42912660568043326
Cohen d: -2.3772808250222512
Correlation between anger and high interest: -0.4572046402007513
Cohen d: -1.4269188233126726
BOREDOM
Affect and boredom
Correlation between boredom and aggressiv: -0.13264841610321568
Cohen d: -0.24405618151390246
Correlation between boredom 

### Now have a look at affect

In [7]:
##########
# Aggressiv
print('AGGRESSIV')
print('Level of Interest and aggressiv')
cor_agg_loi = hp.correlations(df_affect['aggressiv'], df_loi, loi_label)
##########
# Cheerful
print('CHEERFUL')
print('Level of Interest and cheerful')
cor_che_loi = hp.correlations(df_affect['cheerful'], df_loi, loi_label)
##########
# Intoxicated
print('INTOXICATED')
print('Level of Interest and intoxicated')
cor_tox_loi = hp.correlations(df_affect['intoxicated'], df_loi, loi_label)
##########
# Nervous
print('NERVOUS')
print('Level of Interest and nervous')
cor_ner_loi = hp.correlations(df_affect['nervous'], df_loi, loi_label)
##########
# Neutral
print('NEUTRAL')
print('Level of Interest and neutral')
# Now intoxicated and level of Interest
cor_aneu = hp.correlations(df_affect['neutral'], df_loi, loi_label)
#########
# Tired
print('TIRED')
print('Level of Interest and tired')
cor_tir_loi = hp.correlations(df_affect['tired'], df_loi, loi_label)

AGGRESSIV
Level of Interest and aggressiv
Correlation between aggressiv and disinterest: -0.46985007624380576
Cohen d: 1.6685232998938873
Correlation between aggressiv and normal: 0.6493212625168995
Cohen d: -1.9559760973290863
Correlation between aggressiv and high interest: -0.6382560988794522
Cohen d: -0.9907769562863176
CHEERFUL
Level of Interest and cheerful
Correlation between cheerful and disinterest: 0.30740629850886686
Cohen d: 3.2578193931742008
Correlation between cheerful and normal: 0.18631170633269223
Cohen d: -1.255894587971958
Correlation between cheerful and high interest: -0.23207811608422513
Cohen d: -0.2755757861703259
INTOXICATED
Level of Interest and intoxicated
Correlation between intoxicated and disinterest: -0.5066896737629814
Cohen d: 1.8942451153533637
Correlation between intoxicated and normal: -0.18317122995961105
Cohen d: -0.9401051861079537
Correlation between intoxicated and high interest: 0.25079466125469346
Cohen d: -0.06266537498593241
NERVOUS
Level o

## Now look at character data

## Chi-squared Test of Independence
We Start with characteristic sex. The null hypothesis states that the two categorical variables sex and e.g. emotion are independent.

For that we convert the previously used data frames which contain probabilities of e.g. emotion, affect etc. to frequency tables using the helper method calcFrequencyTable(). For each row (sample) the function selects the maximum probability and counts it as an occured frequency. Furthermore the function takes in an integer so that the matching labels for emotion are generated (voice features). The second integer defines the character feature e.g. Age, so that it can split the data set into a form of n x m, where n stands for the number of character features (in most cases it's 2, but for age it's 3) and m stands for the number of voice features e.g. number of different emotions.  
The frequency tables are then used as input for the stats.chi2_contingency() function.

If the p-value is significant, residuals are computed to identify the cells, in which the groups differ, i.e. the data sets differ in the emotion 'anger', which contributes to the significant p-value.

In [8]:
emo_sex_chi2 = hp.chi2(df_emotion_char, 'Sex', 0, True)
aff_sec_chi2 = hp.chi2(df_affect_char, 'Sex', 1, True)
ar_val_sec_chi2 = hp.chi2(df_ar_val_char, 'Sex', 2, True)
loi_sec_chi2 = hp.chi2(df_loi_char, 'Sex', 3, True)

Chi square of Emotion : 0.23324423324423343 with p-value of: 0.9997577204712553
Chi square of Affect : 0.10732466063348399 with p-value of: 0.999806812314343
Chi square of Arousal Valence : 0.11349206349206363 with p-value of: 0.7362029664490812
Chi square of Level of Interest : 0.17514374170720895 with p-value of: 0.9161530245847543


Now move on to academic status, the hypothesis being that the variables academic status and e.g. emotion are independent.

In [9]:
emo_aca_chi2 = hp.chi2(df_emotion_char, 'Academic Status', 0, True)
aff_aca_chi2 = hp.chi2(df_affect_char, 'Academic Status', 1, True)
ar_val_aca_chi2 = hp.chi2(df_ar_val_char, 'Academic Status', 2, True)
loi_aca_chi2 = hp.chi2(df_loi_char, 'Academic Status', 3, True)

Chi square of Emotion : 0.23324423324423338 with p-value of: 0.9997577204712553
Chi square of Affect : 0.7232748868778277 with p-value of: 0.9816669156305917
Chi square of Arousal Valence : 0.11349206349206363 with p-value of: 0.7362029664490812
Chi square of Level of Interest : 0.22291021671826644 with p-value of: 0.8945315474908987


Now let's look if age and e.g. emotion/ affect/ arousal-valence/ level of interest are independent

In [10]:
emo_age_chi2 = hp.chi2(df_emotion_char, 'Age', 0, True)
aff_age_chi2 = hp.chi2(df_affect_char, 'Age', 1, True)
ar_val_age_chi2 = hp.chi2(df_ar_val_char, 'Age', 2, True)
loi_age_chi2 = hp.chi2(df_loi_char, 'Age', 3, True)

Chi square of Emotion : 0.22291021671826622 with p-value of: 0.9999999975799772
Chi square of Affect : 0.5759803921568627 with p-value of: 0.9999870029958444
Chi square of Arousal Valence : 0.22499999999999998 with p-value of: 0.8935973471085157
Chi square of Level of Interest : 0.22291021671826625 with p-value of: 0.9942316580471601


## Post-Hoc tests for age, as it has three different groups

In [11]:
print('post-hoc emotions and different groups')
emo_reject_list, emo_corrected_p_vals, emo_combinations, emo_residuals= hp.chi2_post_hoc(emo_age_chi2[1], 'bonferroni', True, True)
print('\n post-hoc affect and different groups')
aff_reject_list, emo_corrected_p_vals, emo_combinations, aff_residuals = hp.chi2_post_hoc(aff_age_chi2[1], 'bonferroni', True, True)
print('\n post-hoc arousal-valence and different groups')
ar_val_reject_list, ar_val_corrected_p_vals, ar_val_combinations, ar_val_residuals = hp.chi2_post_hoc(ar_val_age_chi2[1], 'bonferroni',True, True)
print('\n post-hoc level of intereset and different groups')
loi_reject_list, loi_corrected_p_vals, loi_combinations, loi_residuals = hp.chi2_post_hoc(loi_age_chi2[1], 'bonferroni', True, True)

post-hoc emotions and different groups
Combinations: [('Young', 'Middle'), ('Young', 'Old'), ('Middle', 'Old')]
Reject List: [False False False]
Corrected p-values: [1. 1. 1.]

 post-hoc affect and different groups
Combinations: [('Young', 'Middle'), ('Young', 'Old'), ('Middle', 'Old')]
Reject List: [False False False]
Corrected p-values: [1. 1. 1.]

 post-hoc arousal-valence and different groups
Combinations: [('Young', 'Middle'), ('Young', 'Old'), ('Middle', 'Old')]
Reject List: [False False False]
Corrected p-values: [1. 1. 1.]

 post-hoc level of intereset and different groups
Combinations: [('Young', 'Middle'), ('Young', 'Old'), ('Middle', 'Old')]
Reject List: [False False False]
Corrected p-values: [1. 1. 1.]
