# OpenSMILE Correlation
This notebook calculates Correlation and Cohen's d for effect size for the given conference data set.

In [1]:
import Helper as hp
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns

#Read Data
data = pd.read_csv("UIST_2019_short_samples_OpenSMILE.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

## Correlation of arousal and other features

In [2]:
#'Clean' our dataFrames so that we can call the panda .corr function (default is spearman)
#Let's start with arousal and look how correlation is between emotions
print('Emotions and Arousal')
cor_ar_emo = hp.correlations(df_ar_val['Arousal'], df_emotion, emotion_label)
# Now arousal and affect
print('Affect and Arousal')
cor_ar_aff = hp.correlations(df_ar_val['Arousal'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and Arousal')
cor_ar_loi = hp.correlations(df_ar_val['Arousal'], df_loi, loi_label)

Emotions and Arousal
Correlation between Arousal and Anger: 0.3098408394837389
Cohen d: 0.06806618429603518
Correlation between Arousal and Boredom: 0.1023850821543185
Cohen d: -0.29876430183088304
Correlation between Arousal and Disgust: 0.567916395670361
Cohen d: -0.7662129737388526
Correlation between Arousal and Fear: 0.12233596739632259
Cohen d: 0.12861575332192837
Correlation between Arousal and Happiness: 0.2610432408733648
Cohen d: 0.10839452370134481
Correlation between Arousal and Emo_Neutral: 0.061629826013328844
Cohen d: 0.04005231253844504
Correlation between Arousal and Sadness: -0.5178053448236828
Cohen d: -4.134801653189615
Affect and Arousal
Correlation between Arousal and Aggressiv: 0.7039818647174771
Cohen d: -1.3768006279218763
Correlation between Arousal and Cheerful: 0.09341875681131924
Cohen d: -4.2983590332784
Correlation between Arousal and Intoxicated: 0.3077130440372754
Cohen d: -1.5187159680132447
Correlation between Arousal and Nervous: -0.4398280984927542


## Correlation of Valence and other features

In [3]:
print('Emotions and valence')
cor_val_emo = hp.correlations(df_ar_val['Valence'], df_emotion, emotion_label)

# Now valence and affect
print('Affect and valence')
cor_val_aff = hp.correlations(df_ar_val['Valence'], df_affect, affect_label)

# Now arousal and level of Interest
print('Level of Interest and valence')
cor_val_loi = hp.correlations(df_ar_val['Valence'], df_loi, loi_label)

Emotions and valence
Correlation between Valence and Anger: -0.373684317225437
Cohen d: -0.0972859155840054
Correlation between Valence and Boredom: -0.43666484823952045
Cohen d: -0.4425137235828643
Correlation between Valence and Disgust: -0.22954791624605117
Cohen d: -0.850487211584826
Correlation between Valence and Fear: -0.33850722133984884
Cohen d: -0.030097341482694422
Correlation between Valence and Happiness: -0.4412640036287537
Cohen d: -0.053042662561992776
Correlation between Valence and Emo_Neutral: -0.33147525887783236
Cohen d: -0.12155676483968056
Correlation between Valence and Sadness: 0.4121391035643897
Cohen d: -4.248943244290755
Affect and valence
Correlation between Valence and Aggressiv: -0.11634564373153033
Cohen d: -1.6438016354897143
Correlation between Valence and Cheerful: -0.06240221350547363
Cohen d: -4.673849215863179
Correlation between Valence and Intoxicated: 0.07868841020261069
Cohen d: -1.7746141722993751
Correlation between Valence and Nervous: 0.243

## Correlation of different emotions and other features

In [4]:
# Now emotion and affect
print('ANGER')
print('Affect and anger')
cor_ang_aff = hp.correlations(df_emotion['Anger'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and anger')
cor_ang_loi = hp.correlations(df_emotion['Anger'], df_loi, loi_label)
##########
# Now look at boredom
print('BOREDOM')
print('Affect and boredom')
cor_bor_aff = hp.correlations(df_emotion['Boredom'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and boredom')
cor_bor_loi = hp.correlations(df_emotion['Boredom'], df_loi, loi_label)
##########
# Disgust
print('DISGUST')
print('Affect and disgust')
cor_dis_aff = hp.correlations(df_emotion['Disgust'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and disgust')
cor_dis_loi = hp.correlations(df_emotion['Disgust'], df_loi, loi_label)
######
# fear
print('FEAR')
print('Affect and fear')
cor_fea_aff = hp.correlations(df_emotion['Fear'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and fear')
cor_fea_loi = hp.correlations(df_emotion['Fear'], df_loi, loi_label)
##########
# happiness
print('HAPPINESS')
print('Affect and happiness')
cor_hap_aff = hp.correlations(df_emotion['Happiness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and happiness')
cor_hap_loi = hp.correlations(df_emotion['Happiness'], df_loi, loi_label)
##########
# neutral
print('NEUTRAL')
print('Affect and neutral')
cor_eneu_aff = hp.correlations(df_emotion['Emo_Neutral'], df_affect, affect_label)
# Now arousal and level of Interest
print('Level of Interest and neutral')
cor_eneu_loi = hp.correlations(df_emotion['Emo_Neutral'], df_loi, loi_label)
##########
# Sadness
print('SADNESS')
print('Affect and sadness')
cor_sad_aff = hp.correlations(df_emotion['Sadness'], df_affect, affect_label)
# Now level of Interest
print('Level of Interest and sadness')
cor_sad_loi = hp.correlations(df_emotion['Sadness'], df_loi, loi_label)

ANGER
Affect and anger
Correlation between Anger and Aggressiv: 0.641114422633133
Cohen d: -2.4862145746469846
Correlation between Anger and Cheerful: 0.03112399943218024
Cohen d: -5.923115505031167
Correlation between Anger and Intoxicated: 0.02853443696426703
Cohen d: -2.489248836019607
Correlation between Anger and Nervous: -0.3210529939102609
Cohen d: -2.085596099604509
Correlation between Anger and Aff_Neutral: -0.1622596877496451
Cohen d: -2.663886446816148
Correlation between Anger and Tired: -0.272612448886402
Cohen d: -1.8801766678765388
Level of Interest and anger
Correlation between Anger and Disinterest: -0.38028631185979106
Cohen d: -0.7109465075469505
Correlation between Anger and Normal: -0.21266301791159453
Cohen d: -9.15548034789561
Correlation between Anger and High Interest: 0.24248611308290316
Cohen d: -1.1366737685603665
BOREDOM
Affect and boredom
Correlation between Boredom and Aggressiv: 0.2268905635112522
Cohen d: -1.1026530416706013
Correlation between Boredom 

## Correlation of affect and other features

In [5]:
##########
# Aggressiv
print('AGGRESSIV')
print('Level of Interest and aggressiv')
cor_agg_loi = hp.correlations(df_affect['Aggressiv'], df_loi, loi_label)
##########
# Cheerful
print('CHEERFUL')
print('Level of Interest and cheerful')
cor_che_loi = hp.correlations(df_affect['Cheerful'], df_loi, loi_label)
##########
# Intoxicated
print('INTOXICATED')
print('Level of Interest and intoxicated')
cor_tox_loi = hp.correlations(df_affect['Intoxicated'], df_loi, loi_label)
##########
# Nervous
print('NERVOUS')
print('Level of Interest and nervous')
cor_ner_loi = hp.correlations(df_affect['Nervous'], df_loi, loi_label)
##########
# Neutral
print('NEUTRAL')
print('Level of Interest and neutral')
# Now intoxicated and level of Interest
cor_aneu = hp.correlations(df_affect['Aff_Neutral'], df_loi, loi_label)
#########
# Tired
print('TIRED')
print('Level of Interest and tired')
cor_tir_loi = hp.correlations(df_affect['Tired'], df_loi, loi_label)

AGGRESSIV
Level of Interest and aggressiv
Correlation between Aggressiv and Disinterest: -0.5439748718473362
Cohen d: 2.3093540355666526
Correlation between Aggressiv and Normal: -0.12513239221246003
Cohen d: -7.18017704957523
Correlation between Aggressiv and High Interest: 0.16973937642402595
Cohen d: 0.07130097415283833
CHEERFUL
Level of Interest and cheerful
Correlation between Cheerful and Disinterest: -0.32683767862220653
Cohen d: 5.818328811072386
Correlation between Cheerful and Normal: -0.06118005690389491
Cohen d: -3.7631418105610925
Correlation between Cheerful and High Interest: 0.08813381280701353
Cohen d: 2.657581402203916
INTOXICATED
Level of Interest and intoxicated
Correlation between Intoxicated and Disinterest: 0.00676858918317989
Cohen d: 2.3332667770987965
Correlation between Intoxicated and Normal: 0.16425821498457382
Cohen d: -6.783518922869733
Correlation between Intoxicated and High Interest: -0.1630443264640382
Cohen d: 0.23591959664581016
NERVOUS
Level of Int