# OpenSMILE Correlation
This notebook calculates Correlation and Cohen's d for effect size for the given conference data set.

In [1]:
import Helper as hp
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns

#Read Data
data = pd.read_csv("CHI_2019_FULL.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

## Correlation of arousal and other features

In [2]:
#'Clean' our dataFrames so that we can call the panda .corr function (default is spearman)
#Let's start with arousal and look how correlation is between emotions
print('Emotions and Arousal')
cor_ar_emo = hp.correlations(df_ar_val['Arousal'], df_emotion, emotion_label)
# Now arousal and affect
print('Affect and Arousal')
cor_ar_aff = hp.correlations(df_ar_val['Arousal'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and Arousal')
cor_ar_loi = hp.correlations(df_ar_val['Arousal'], df_loi, loi_label)

Emotions and Arousal
Correlation between Arousal and Anger: 0.25255370376242414
Cohen d: 1.3573865249274477
Correlation between Arousal and Boredom: -0.001106569644092107
Cohen d: 0.6455168763416517
Correlation between Arousal and Disgust: 0.4226282883964526
Cohen d: -0.4670573629305912
Correlation between Arousal and Fear: 0.08580268053997284
Cohen d: 1.5485603397340777
Correlation between Arousal and Happiness: 0.21509188004100346
Cohen d: 1.544243037501692
Correlation between Arousal and Emo_Neutral: 0.003149503096701794
Cohen d: 1.4922484348648426
Correlation between Arousal and Sadness: -0.4059846893083776
Cohen d: -2.4107760439361514
Affect and Arousal
Correlation between Arousal and Aggressiv: 0.576954650830989
Cohen d: -0.512701550252304
Correlation between Arousal and Cheerful: -0.06946690020406257
Cohen d: -2.474039283976268
Correlation between Arousal and Intoxicated: 0.329779514947111
Cohen d: 0.029913059151179092
Correlation between Arousal and Nervous: -0.1668139318360092

## Correlation of Valence and other features

In [3]:
print('Emotions and valence')
cor_val_emo = hp.correlations(df_ar_val['Valence'], df_emotion, emotion_label)

# Now valence and affect
print('Affect and valence')
cor_val_aff = hp.correlations(df_ar_val['Valence'], df_affect, affect_label)

# Now arousal and level of Interest
print('Level of Interest and valence')
cor_val_loi = hp.correlations(df_ar_val['Valence'], df_loi, loi_label)

Emotions and valence
Correlation between Valence and Anger: -0.14452524415728302
Cohen d: 0.48485210753405894
Correlation between Valence and Boredom: -0.2362706304432707
Cohen d: -0.13130264314400483
Correlation between Valence and Disgust: -0.07767829451533187
Cohen d: -0.9258030018839601
Correlation between Valence and Fear: -0.251623341858475
Cohen d: 0.6777871431910653
Correlation between Valence and Happiness: -0.24758336584796678
Cohen d: 0.6683491218819197
Correlation between Valence and Emo_Neutral: -0.18526192183003348
Cohen d: 0.6125365362280851
Correlation between Valence and Sadness: 0.2129627227835592
Cohen d: -2.8511433574437506
Affect and valence
Correlation between Valence and Aggressiv: -0.07941063186252019
Cohen d: -1.4876748125557133
Correlation between Valence and Cheerful: -0.02319798570467146
Cohen d: -3.6008578143882612
Correlation between Valence and Intoxicated: 0.07290756235462001
Cohen d: -1.2157018837637508
Correlation between Valence and Nervous: 0.0892147

## Correlation of different emotions and other features

In [4]:
# Now emotion and affect
print('ANGER')
print('Affect and anger')
cor_ang_aff = hp.correlations(df_emotion['Anger'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and anger')
cor_ang_loi = hp.correlations(df_emotion['Anger'], df_loi, loi_label)
##########
# Now look at boredom
print('BOREDOM')
print('Affect and boredom')
cor_bor_aff = hp.correlations(df_emotion['Boredom'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and boredom')
cor_bor_loi = hp.correlations(df_emotion['Boredom'], df_loi, loi_label)
##########
# Disgust
print('DISGUST')
print('Affect and disgust')
cor_dis_aff = hp.correlations(df_emotion['Disgust'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and disgust')
cor_dis_loi = hp.correlations(df_emotion['Disgust'], df_loi, loi_label)
######
# fear
print('FEAR')
print('Affect and fear')
cor_fea_aff = hp.correlations(df_emotion['Fear'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and fear')
cor_fea_loi = hp.correlations(df_emotion['Fear'], df_loi, loi_label)
##########
# happiness
print('HAPPINESS')
print('Affect and happiness')
cor_hap_aff = hp.correlations(df_emotion['Happiness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and happiness')
cor_hap_loi = hp.correlations(df_emotion['Happiness'], df_loi, loi_label)
##########
# neutral
print('NEUTRAL')
print('Affect and neutral')
cor_eneu_aff = hp.correlations(df_emotion['Emo_Neutral'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and neutral')
cor_eneu_loi = hp.correlations(df_emotion['Emo_Neutral'], df_loi, loi_label)
##########
# Sadness
print('SADNESS')
print('Affect and sadness')
cor_sad_aff = hp.correlations(df_emotion['Sadness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and sadness')
cor_sad_loi = hp.correlations(df_emotion['Sadness'], df_loi, loi_label)

ANGER
Affect and anger
Correlation between Anger and Aggressiv: 0.5709535519338497
Cohen d: -2.131694483822812
Correlation between Anger and Cheerful: -0.12009238152492278
Cohen d: -4.414005025953416
Correlation between Anger and Intoxicated: -0.05155239042021434
Cohen d: -2.429804894665699
Correlation between Anger and Nervous: -0.28877377929192966
Cohen d: -0.5191245752331356
Correlation between Anger and Aff_Neutral: -0.2151002548329658
Cohen d: -1.2063934914507266
Correlation between Anger and Tired: -0.1800951153479594
Cohen d: -1.2926807250586014
Level of Interest and anger
Correlation between Anger and Disinterest: -0.18635615427261384
Cohen d: 0.12735157930536276
Correlation between Anger and Normal: -0.10480493897644456
Cohen d: -2.188169185748951
Correlation between Anger and High Interest: 0.11150159183622312
Cohen d: -2.1567655125346765
BOREDOM
Affect and boredom
Correlation between Boredom and Aggressiv: 0.19820346633386957
Cohen d: -1.1892731842534168
Correlation between 

## Correlation of affect and other features

In [5]:
##########
# Aggressiv
print('AGGRESSIV')
print('Level of Interest and aggressiv')
cor_agg_loi = hp.correlations(df_affect['Aggressiv'], df_loi, loi_label)
##########
# Cheerful
print('CHEERFUL')
print('Level of Interest and cheerful')
cor_che_loi = hp.correlations(df_affect['Cheerful'], df_loi, loi_label)
##########
# Intoxicated
print('INTOXICATED')
print('Level of Interest and intoxicated')
cor_tox_loi = hp.correlations(df_affect['Intoxicated'], df_loi, loi_label)
##########
# Nervous
print('NERVOUS')
print('Level of Interest and nervous')
cor_ner_loi = hp.correlations(df_affect['Nervous'], df_loi, loi_label)
##########
# Neutral
print('NEUTRAL')
print('Level of Interest and neutral')
# Now intoxicated and level of Interest
cor_aneu = hp.correlations(df_affect['Aff_Neutral'], df_loi, loi_label)
#########
# Tired
print('TIRED')
print('Level of Interest and tired')
cor_tir_loi = hp.correlations(df_affect['Tired'], df_loi, loi_label)

AGGRESSIV
Level of Interest and aggressiv
Correlation between Aggressiv and Disinterest: -0.5035384783892148
Cohen d: 2.369383539156594
Correlation between Aggressiv and Normal: -0.07379231952531891
Cohen d: -1.3611599111726962
Correlation between Aggressiv and High Interest: 0.0950824513041327
Cohen d: -1.343661830439837
CHEERFUL
Level of Interest and cheerful
Correlation between Cheerful and Disinterest: -0.2139035671870638
Cohen d: 4.741993219197139
Correlation between Cheerful and Normal: 0.17815430829745127
Cohen d: -0.35295949215565187
Correlation between Cheerful and High Interest: -0.16591277130470306
Cohen d: -0.34948448025369006
INTOXICATED
Level of Interest and intoxicated
Correlation between Intoxicated and Disinterest: -0.11241308551271742
Cohen d: 3.3761554056971446
Correlation between Intoxicated and Normal: 0.18298844474499212
Cohen d: -1.6781369244736521
Correlation between Intoxicated and High Interest: -0.17519145057506463
Cohen d: -1.6543811718156123
NERVOUS
Level o