# OpenSMILE Correlation
This notebook calculates Correlation and Cohen's d for effect size for the given conference data set.

In [1]:
import Helper as hp
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns

#Read Data
data = pd.read_csv("CHI_2019_FULL.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

## Correlation of arousal and other features

In [2]:
#'Clean' our dataFrames so that we can call the panda .corr function (default is spearman)
#Let's start with arousal and look how correlation is between emotions
print('Emotions and Arousal')
cor_ar_emo = hp.correlations(df_ar_val['Arousal'], df_emotion, emotion_label)
# Now arousal and affect
print('Affect and Arousal')
cor_ar_aff = hp.correlations(df_ar_val['Arousal'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and Arousal')
cor_ar_loi = hp.correlations(df_ar_val['Arousal'], df_loi, loi_label)

Emotions and Arousal
Correlation between Arousal and Anger: 0.26332264110799225
Cohen d: 0.8458847999867704
Correlation between Arousal and Boredom: 0.044886371169640865
Cohen d: 0.4323978616926827
Correlation between Arousal and Disgust: 0.5031616867099608
Cohen d: -0.6458730008444528
Correlation between Arousal and Fear: 0.19677723419267382
Cohen d: 0.9778761324581765
Correlation between Arousal and Happiness: 0.26891272827839435
Cohen d: 0.9583168782385978
Correlation between Arousal and Emo_Neutral: -0.0008885536091654936
Cohen d: 0.9276984217523622
Correlation between Arousal and Sadness: -0.5030356410033169
Cohen d: -2.5778323425917415
Affect and Arousal
Correlation between Arousal and Aggressiv: 0.6603960027110047
Cohen d: -0.6535714177605594
Correlation between Arousal and Cheerful: -0.08846726248760696
Cohen d: -2.697799790211063
Correlation between Arousal and Intoxicated: 0.20755568974494693
Cohen d: -0.4679008277238698
Correlation between Arousal and Nervous: -0.19797253322

## Correlation of Valence and other features

In [3]:
print('Emotions and valence')
cor_val_emo = hp.correlations(df_ar_val['Valence'], df_emotion, emotion_label)

# Now valence and affect
print('Affect and valence')
cor_val_aff = hp.correlations(df_ar_val['Valence'], df_affect, affect_label)

# Now arousal and level of Interest
print('Level of Interest and valence')
cor_val_loi = hp.correlations(df_ar_val['Valence'], df_loi, loi_label)

Emotions and valence
Correlation between Valence and Anger: -0.15055452639058786
Cohen d: 0.13396733932857335
Correlation between Valence and Boredom: -0.31447190694860566
Cohen d: -0.2644032656778474
Correlation between Valence and Disgust: -0.0914396265990685
Cohen d: -1.042055275569945
Correlation between Valence and Fear: -0.22195151961071571
Cohen d: 0.27593831422329324
Correlation between Valence and Happiness: -0.2584192218702297
Cohen d: 0.2480711013178255
Correlation between Valence and Emo_Neutral: -0.27363341666323304
Cohen d: 0.21464559799766708
Correlation between Valence and Sadness: 0.23472547384643278
Cohen d: -3.0000006232266676
Affect and valence
Correlation between Valence and Aggressiv: -0.06283870911233999
Cohen d: -1.569229622009912
Correlation between Valence and Cheerful: -0.12225178137446144
Cohen d: -3.9596041172210215
Correlation between Valence and Intoxicated: 0.08641523351258751
Cohen d: -1.5660589731437382
Correlation between Valence and Nervous: 0.163136

## Correlation of different emotions and other features

In [4]:
# Now emotion and affect
print('ANGER')
print('Affect and anger')
cor_ang_aff = hp.correlations(df_emotion['Anger'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and anger')
cor_ang_loi = hp.correlations(df_emotion['Anger'], df_loi, loi_label)
##########
# Now look at boredom
print('BOREDOM')
print('Affect and boredom')
cor_bor_aff = hp.correlations(df_emotion['Boredom'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and boredom')
cor_bor_loi = hp.correlations(df_emotion['Boredom'], df_loi, loi_label)
##########
# Disgust
print('DISGUST')
print('Affect and disgust')
cor_dis_aff = hp.correlations(df_emotion['Disgust'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and disgust')
cor_dis_loi = hp.correlations(df_emotion['Disgust'], df_loi, loi_label)
######
# fear
print('FEAR')
print('Affect and fear')
cor_fea_aff = hp.correlations(df_emotion['Fear'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and fear')
cor_fea_loi = hp.correlations(df_emotion['Fear'], df_loi, loi_label)
##########
# happiness
print('HAPPINESS')
print('Affect and happiness')
cor_hap_aff = hp.correlations(df_emotion['Happiness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and happiness')
cor_hap_loi = hp.correlations(df_emotion['Happiness'], df_loi, loi_label)
##########
# neutral
print('NEUTRAL')
print('Affect and neutral')
cor_eneu_aff = hp.correlations(df_emotion['Emo_Neutral'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and neutral')
cor_eneu_loi = hp.correlations(df_emotion['Emo_Neutral'], df_loi, loi_label)
##########
# Sadness
print('SADNESS')
print('Affect and sadness')
cor_sad_aff = hp.correlations(df_emotion['Sadness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and sadness')
cor_sad_loi = hp.correlations(df_emotion['Sadness'], df_loi, loi_label)

ANGER
Affect and anger
Correlation between Anger and Aggressiv: 0.5480758277805446
Cohen d: -2.0063881244760937
Correlation between Anger and Cheerful: -0.12978239081329704
Cohen d: -4.744009540962004
Correlation between Anger and Intoxicated: -0.06874054009023688
Cohen d: -2.3142967713680402
Correlation between Anger and Nervous: -0.2576579563786324
Cohen d: -0.6270865673301028
Correlation between Anger and Aff_Neutral: -0.20546498506844119
Cohen d: -1.5162901134792885
Correlation between Anger and Tired: -0.1502984749973127
Cohen d: -1.354026342686464
Level of Interest and anger
Correlation between Anger and Disinterest: -0.1918835634150246
Cohen d: -0.015184756144257305
Correlation between Anger and Normal: -0.1894019365205821
Cohen d: -2.8605552773781895
Correlation between Anger and High Interest: 0.1946696464795092
Cohen d: -1.4031731696411767
BOREDOM
Affect and boredom
Correlation between Boredom and Aggressiv: 0.13732885127000197
Cohen d: -1.2782572564777879
Correlation between

## Correlation of affect and other features

In [5]:
##########
# Aggressiv
print('AGGRESSIV')
print('Level of Interest and aggressiv')
cor_agg_loi = hp.correlations(df_affect['Aggressiv'], df_loi, loi_label)
##########
# Cheerful
print('CHEERFUL')
print('Level of Interest and cheerful')
cor_che_loi = hp.correlations(df_affect['Cheerful'], df_loi, loi_label)
##########
# Intoxicated
print('INTOXICATED')
print('Level of Interest and intoxicated')
cor_tox_loi = hp.correlations(df_affect['Intoxicated'], df_loi, loi_label)
##########
# Nervous
print('NERVOUS')
print('Level of Interest and nervous')
cor_ner_loi = hp.correlations(df_affect['Nervous'], df_loi, loi_label)
##########
# Neutral
print('NEUTRAL')
print('Level of Interest and neutral')
# Now intoxicated and level of Interest
cor_aneu = hp.correlations(df_affect['Aff_Neutral'], df_loi, loi_label)
#########
# Tired
print('TIRED')
print('Level of Interest and tired')
cor_tir_loi = hp.correlations(df_affect['Tired'], df_loi, loi_label)

AGGRESSIV
Level of Interest and aggressiv
Correlation between Aggressiv and Disinterest: -0.5132153703305571
Cohen d: 2.1678896238094283
Correlation between Aggressiv and Normal: -0.33721507328840333
Cohen d: -2.0944637400084005
Correlation between Aggressiv and High Interest: 0.3539122055946881
Cohen d: -0.7017385687028432
CHEERFUL
Level of Interest and cheerful
Correlation between Cheerful and Disinterest: -0.2262736630396694
Cohen d: 5.060648150314271
Correlation between Cheerful and Normal: 0.14205670473141196
Cohen d: -1.0280949656615777
Correlation between Cheerful and High Interest: -0.13021782540679946
Cohen d: 0.3347348152246994
INTOXICATED
Level of Interest and intoxicated
Correlation between Intoxicated and Disinterest: 0.0020707438813597845
Cohen d: 2.742262727375662
Correlation between Intoxicated and Normal: 0.1607457135990369
Cohen d: -2.273461555183291
Correlation between Intoxicated and High Interest: -0.1583586832573953
Cohen d: -0.8397077893408761
NERVOUS
Level of In