# OpenSMILE Correlation
This notebook calculates Correlation and Cohen's d for effect size for the given conference data set.

In [2]:
import Helper as hp
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns

#Read Data
data = pd.read_csv("UIST2019_OpenSMILE.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

## Correlation of arousal and other features

In [3]:
#'Clean' our dataFrames so that we can call the panda .corr function (default is spearman)
#Let's start with arousal and look how correlation is between emotions
print('Emotions and Arousal')
cor_ar_emo = hp.correlations(df_ar_val['Arousal'], df_emotion, emotion_label)
# Now arousal and affect
print('Affect and Arousal')
cor_ar_aff = hp.correlations(df_ar_val['Arousal'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and Arousal')
cor_ar_loi = hp.correlations(df_ar_val['Arousal'], df_loi, loi_label)

Emotions and Arousal
Correlation between Arousal and Anger: -0.11960116467375333
Cohen d: 1.0038101525157788
Correlation between Arousal and Boredom: -0.15724156022576966
Cohen d: 0.9280649625886008
Correlation between Arousal and Disgust: 0.31142090322066945
Cohen d: 0.12619986564219535
Correlation between Arousal and Fear: -0.12065193543111381
Cohen d: 1.0088509049605552
Correlation between Arousal and Happiness: -0.12745701355512387
Cohen d: 1.0068969820425526
Correlation between Arousal and Emo_Neutral: -0.12625517556888255
Cohen d: 1.0000073098884006
Correlation between Arousal and Sadness: -0.2690114288749918
Cohen d: -5.460177225486265
Affect and Arousal
Correlation between Arousal and Aggressiv: 0.06684792387301247
Cohen d: 0.048486623323264935
Correlation between Arousal and Cheerful: -0.5678322970999742
Cohen d: -1.2806979634109281
Correlation between Arousal and Intoxicated: 0.614456354498157
Cohen d: -1.1121919431101954
Correlation between Arousal and Nervous: 0.20051069137

## Correlation of Valence and other features

In [4]:
print('Emotions and valence')
cor_val_emo = hp.correlations(df_ar_val['Valence'], df_emotion, emotion_label)

# Now valence and affect
print('Affect and valence')
cor_val_aff = hp.correlations(df_ar_val['Valence'], df_affect, affect_label)

# Now arousal and level of Interest
print('Level of Interest and valence')
cor_val_loi = hp.correlations(df_ar_val['Valence'], df_loi, loi_label)

Emotions and valence
Correlation between Valence and Anger: -0.3124995201247417
Cohen d: 1.4200472018669366
Correlation between Valence and Boredom: -0.26509745567227183
Cohen d: 1.3594280990223377
Correlation between Valence and Disgust: -0.20672906887783082
Cohen d: 0.5867104641016552
Correlation between Valence and Fear: -0.27483848312832276
Cohen d: 1.4239631878102155
Correlation between Valence and Happiness: -0.3065434608342312
Cohen d: 1.422446394407046
Correlation between Valence and Emo_Neutral: -0.24688029616822277
Cohen d: 1.417063981453608
Correlation between Valence and Sadness: 0.26306821038460826
Cohen d: -4.373471293321692
Affect and valence
Correlation between Valence and Aggressiv: -0.3335699261573498
Cohen d: 0.6561805645814583
Correlation between Valence and Cheerful: -0.7241748894708327
Cohen d: -0.6553855428137522
Correlation between Valence and Intoxicated: 0.5310386061614225
Cohen d: -0.6546721565220764
Correlation between Valence and Nervous: 0.6158725167513683

## Correlation of different emotions and other features

In [5]:
# Now emotion and affect
print('ANGER')
print('Affect and anger')
cor_ang_aff = hp.correlations(df_emotion['Anger'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and anger')
cor_ang_loi = hp.correlations(df_emotion['Anger'], df_loi, loi_label)
##########
# Now look at boredom
print('BOREDOM')
print('Affect and boredom')
cor_bor_aff = hp.correlations(df_emotion['Boredom'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and boredom')
cor_bor_loi = hp.correlations(df_emotion['Boredom'], df_loi, loi_label)
##########
# Disgust
print('DISGUST')
print('Affect and disgust')
cor_dis_aff = hp.correlations(df_emotion['Disgust'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and disgust')
cor_dis_loi = hp.correlations(df_emotion['Disgust'], df_loi, loi_label)
######
# fear
print('FEAR')
print('Affect and fear')
cor_fea_aff = hp.correlations(df_emotion['Fear'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and fear')
cor_fea_loi = hp.correlations(df_emotion['Fear'], df_loi, loi_label)
##########
# happiness
print('HAPPINESS')
print('Affect and happiness')
cor_hap_aff = hp.correlations(df_emotion['Happiness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and happiness')
cor_hap_loi = hp.correlations(df_emotion['Happiness'], df_loi, loi_label)
##########
# neutral
print('NEUTRAL')
print('Affect and neutral')
cor_eneu_aff = hp.correlations(df_emotion['Emo_Neutral'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and neutral')
cor_eneu_loi = hp.correlations(df_emotion['Emo_Neutral'], df_loi, loi_label)
##########
# Sadness
print('SADNESS')
print('Affect and sadness')
cor_sad_aff = hp.correlations(df_emotion['Sadness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and sadness')
cor_sad_loi = hp.correlations(df_emotion['Sadness'], df_loi, loi_label)

ANGER
Affect and anger
Correlation between Anger and Aggressiv: 0.23964088160619282
Cohen d: -2.575375652135281
Correlation between Anger and Cheerful: 0.2766279052354653
Cohen d: -2.54244685327253
Correlation between Anger and Intoxicated: -0.19822753557486464
Cohen d: -1.8417008864025526
Correlation between Anger and Nervous: -0.13735598175422528
Cohen d: -1.3004006304271691
Correlation between Anger and Aff_Neutral: -0.16259822887082206
Cohen d: -2.34163026077441
Correlation between Anger and Tired: 0.1105955221813773
Cohen d: -1.5252241417571457
Level of Interest and anger
Correlation between Anger and Disinterest: -0.16507842650147914
Cohen d: -1.0834350225209848
Correlation between Anger and Normal: 0.010935129350005316
Cohen d: -7.514696690964597
Correlation between Anger and High Interest: 0.07164551352517692
Cohen d: -0.9999633842840118
BOREDOM
Affect and boredom
Correlation between Boredom and Aggressiv: 0.07834589344162006
Cohen d: -2.1330200744748296
Correlation between Bor

## Correlation of affect and other features

In [6]:
##########
# Aggressiv
print('AGGRESSIV')
print('Level of Interest and aggressiv')
cor_agg_loi = hp.correlations(df_affect['Aggressiv'], df_loi, loi_label)
##########
# Cheerful
print('CHEERFUL')
print('Level of Interest and cheerful')
cor_che_loi = hp.correlations(df_affect['Cheerful'], df_loi, loi_label)
##########
# Intoxicated
print('INTOXICATED')
print('Level of Interest and intoxicated')
cor_tox_loi = hp.correlations(df_affect['Intoxicated'], df_loi, loi_label)
##########
# Nervous
print('NERVOUS')
print('Level of Interest and nervous')
cor_ner_loi = hp.correlations(df_affect['Nervous'], df_loi, loi_label)
##########
# Neutral
print('NEUTRAL')
print('Level of Interest and neutral')
# Now intoxicated and level of Interest
cor_aneu = hp.correlations(df_affect['Aff_Neutral'], df_loi, loi_label)
#########
# Tired
print('TIRED')
print('Level of Interest and tired')
cor_tir_loi = hp.correlations(df_affect['Tired'], df_loi, loi_label)

AGGRESSIV
Level of Interest and aggressiv
Correlation between Aggressiv and Disinterest: -0.37600554602965947
Cohen d: 0.6172006300170615
Correlation between Aggressiv and Normal: 0.18040556873532998
Cohen d: -6.405055252844228
Correlation between Aggressiv and High Interest: -0.018773756064177527
Cohen d: -0.054025108125872405
CHEERFUL
Level of Interest and cheerful
Correlation between Cheerful and Disinterest: -0.5724896790302542
Cohen d: 1.887980017620989
Correlation between Cheerful and Normal: 0.2795570728565973
Cohen d: -3.553168536617402
Correlation between Cheerful and High Interest: -0.03429455210080225
Cohen d: 1.2709946787499047
INTOXICATED
Level of Interest and intoxicated
Correlation between Intoxicated and Disinterest: 0.28524946241995097
Cohen d: 1.4654871740406061
Correlation between Intoxicated and Normal: -0.13644370042294082
Cohen d: -2.6319160155207424
Correlation between Intoxicated and High Interest: 0.013754189913378435
Cohen d: 1.106291756382935
NERVOUS
Level of