# CHI Square Test
To further look into differences in our data, we conduct multiple chi² tests to see if there are any significant differences between females/males and grad students/PhDs regarding different emotion, affect, level of interest and arousal valence attributes.

## Import relevant libraries

In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import sklearn.preprocessing as pp
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns
import Helper as hp

  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)


## Load .csv data with results of OpenSMILE Analysis
First we load .csv data and clean it (removing of NaNs), then we store information of all files in seperate panda dataframes containing information about affect, emotion and valence/arousal for all participants.

In [2]:
data = pd.read_csv("CHI_2019_FULL.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)



## Let's load information about the speakers
The speaker ID is saved in a single .csv file containing four important columns: ID, Age, Sex and Acadedmic Status. Since before loaded OpenSMILE csv files are named using the corresponding index (ex. speaker with id 0 has two files 0_a.csv and 0_b.csv), so that a link can be created

In [3]:
char_data = pd.read_csv("CHI_2019_CharacterData.csv")  

#Join above tables and Character Tables

#To Join DataFrames we have to cast the column on which we want to join to int, so that both columns have the same data type
char_data['ID'] = char_data['ID'].astype(int)
df_ar_val['Char_ID'] = df_ar_val['Char_ID'].astype(int)
df_emotion['Char_ID'] = df_emotion['Char_ID'].astype(int)
df_affect['Char_ID'] = df_affect['Char_ID'].astype(int)
df_loi['Char_ID'] = df_loi['Char_ID'].astype(int)

#Safe new data frames
df_ar_val_char = df_ar_val.merge(char_data, how = 'left', left_on='Char_ID', right_on='ID')
df_emotion_char = df_emotion.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_affect_char = df_affect.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_loi_char = df_loi.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')

#Now, we only want to have data containing information about the answers
#For that we need to extract from the filename column, whether the file was part of an answer
#a = answer, p = presentation, q = question
#sentence_type should be the same for all tables, but just to be sure
arval_sentence_type = df_ar_val_char.Filename.str.replace('\d+','').str[3:-4]
df_ar_val_char['SentenceType'] = arval_sentence_type
emo_sentence_type = df_emotion_char.Filename.str.replace('\d+','').str[3:-4]
df_emotion_char['SentenceType'] = emo_sentence_type
aff_sentence_type = df_affect_char.Filename.str.replace('\d+','').str[3:-4]
df_affect_char['SentenceType'] = aff_sentence_type
loi_sentence_type = df_loi_char.Filename.str.replace('\d+','').str[3:-4]
df_loi_char['SentenceType'] = loi_sentence_type

#Now select only those who have SentenceType == 'a'
df_ar_val_char = df_ar_val_char.loc[df_ar_val_char['SentenceType'] == 'a']
df_emotion_char = df_emotion_char.loc[df_emotion_char['SentenceType'] == 'a']
df_affect_char = df_affect_char.loc[df_affect_char['SentenceType'] == 'a']
df_loi_char = df_loi_char.loc[df_loi_char['SentenceType'] == 'a']

affect_label.remove('Intoxicated')
df_affect_char = df_affect_char.drop(['Intoxicated'], axis = 1)
norm_test = pp.normalize(df_affect_char[affect_label], norm = 'l1')
df_affect_char[affect_label] = norm_test

df_loi_char['Normal Interest'] = df_loi_char['Disinterest'] + df_loi_char['Normal']
df_loi_char = df_loi_char.drop(['Disinterest', 'Normal'], axis = 1)
loi_label = ['Normal Interest', 'High Interest']

#Now let's compensate for multiple samples of the same person
IDs = df_emotion_char['Char_ID'].copy()
IDs.drop_duplicates()

# I know that for loops make the following operations very very slow, but I could not find a way to get this
# working with pd.apply and lambda functions... 

for i in IDs:
    #first for emotion
    for l in emotion_label:    
        emo = df_emotion_char.loc[df_emotion_char.Char_ID == i,l]
        emo_neu = hp.constructMedianSeries(emo)
        df_emotion_char.loc[df_emotion_char.Char_ID == i,l] = emo_neu
    #Then for affect
    for l in affect_label:    
        aff = df_affect_char.loc[df_affect_char.Char_ID == i,l]
        aff_neu = hp.constructMedianSeries(aff)
        df_affect_char.loc[df_affect_char.Char_ID == i,l] = aff_neu  
    for l in loi_label:    
        loi = df_loi_char.loc[df_loi_char.Char_ID == i,l]
        loi_neu = hp.constructMedianSeries(loi)
        df_loi_char.loc[df_loi_char.Char_ID == i,l] = loi_neu  
    for l in ['Arousal', 'Valence']:    
        arval = df_ar_val_char.loc[df_ar_val_char.Char_ID == i,l]
        arval_neu = hp.constructMedianSeries(arval)
        df_ar_val_char.loc[df_ar_val_char.Char_ID == i,l] = arval_neu  
        
#Now let's drop the duplicate values, since we only need one row now per person
df_emotion_char.drop_duplicates(subset=['ID'], inplace = True)
df_affect_char.drop_duplicates(subset=['ID'], inplace = True)
df_loi_char.drop_duplicates(subset=['ID'], inplace = True)
df_ar_val_char.drop_duplicates(subset=['ID'], inplace = True)    

## Chi-squared Test of Independence
We Start with characteristic sex. The null hypothesis states that the two categorical variables sex and e.g. emotion are independent.

Since we have float data and chi² needs integer data, such as observation counts, we have to convert our data. To illustrate how this is done, we'll look at a specific emotion, 'Anger'. We need to make sure that in our observation count, we do not have any cells with a value of less than 5, since this yields errors and may falsify the result. So we calculate the quantiles of our emotion 'Anger', which yields us three thresholds to compare the float data. This way, we can count how many samples were in the 1st, 2nd, 3rd or 4th Quantile. We want to compare two (or more) groups, so we compare only the female values and sort them into quartiles, then for male values. This yields a 2x4 table. An example table is printed below. This table is used to calculate the chi2 statistic. Note that the function 'calcFrequencyTable' takes in a pd.DataFrame, not a pd.Series and returns an array of pd.DataFrames. This means, that the function calculates these tables for all different emotions defined in e.g. emotion_label.

In [4]:
#Example Frequency Table for the emotion 'Anger':
#Since the function does the table calculation for all different emotions, we only want to select the first table
#which holds the table for 'anger' (since it's the first element, see declaration of emotion_label at the start)
anger_table = hp.calcFrequencyTable(df_emotion_char, emotion_label, 'Sex')[0]
anger_table

Unnamed: 0,Arousal,Valence,Filename,Char_ID,ID,VideoTitle,Name,Sex,Academic Status,VideoID,IsNativeSpeaker,SentenceType
0,0.350267,0.057099,192_a_a.wav,192,192,23 Ways to Nudge: Review of Technology-Mediate...,Ana Caraban,Female,Grad Student,-U3Bo7KR108,Europ. Non-Native,a
11,0.032463,0.065131,197_a_a.wav,197,197,"A Badge, Not a Barrier: Designing for- and Trh...",Caroline Pitt,Female,Grad Student,HxMSkH5ZpZI,Native Speaker,a
18,0.135614,0.013635,200_a_a.wav,200,200,A Change of Perspective: Designing Automated v...,Lenja Sorokin,Female,PhD,SmnSU4QceSk,Europ. Non-Native,a
25,0.140131,0.043628,203_a_a.wav,203,203,A Design Space for Gaze Interaction on HMD,Teresa Hirzle,Female,Grad Student,91VMIz6MAZ8,Europ. Non-Native,a
34,0.254557,0.002145,207_a_a.wav,207,207,Analyzing Value Discovery in Design Decisions ...,Sai Shruthi Chivukula,Female,Grad Student,3lV3YQUvyyY,Asian Non-Native,a
...,...,...,...,...,...,...,...,...,...,...,...,...
590,0.244323,0.120048,447_a_a.wav,447,447,Implementing Multi-Touch Gestures with Touch G...,Brad Myers,Male,PhD,didYdY83zoc,Native Speaker,a
599,-0.000361,0.105117,451_a_a.wav,451,451,Improving Early Navigation in Time-Lapse Video...,Michael Van Der Kamp,Male,Grad Student,G-LMD3Ro_hQ,Native Speaker,a
608,0.174183,0.129396,455_a_a.wav,455,455,In a Silent Way: Communication Between AI and ...,Jon McCormack,Male,PhD,r5KA1Jo_vYU,Native Speaker,a
613,0.207187,-0.021107,457_a_a.wav,457,457,Interferi: Gesture Sensing using On-Body Acous...,Yasha Iravantchi,Male,Grad Student,xBpBBpQh7GI,Native Speaker,a


In [5]:
print('EMOTION\n')
emo_sex_chi2 = hp.chi2(df_emotion_char, emotion_label,'Sex',  True)
print('\nAFFECT\n')
aff_sec_chi2 = hp.chi2(df_affect_char, affect_label,'Sex',  True)
print('\nAROUSAL-VALENCE\n')
ar_val_sec_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'], 'Sex', True)
print('\nLEVEL OF INTEREST\n')
loi_sec_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'], 'Sex', True)
#Have a look at residuals for significant results
print('\nResiduals of: '+ affect_label[3])
aff_sec_chi2[1][3]

EMOTION

Chi square of Anger : 1.8355531358885018 with p-value of: 0.607228904660063
Cramers V: 0.05830242253858407
Chi square of Boredom : 3.3214939024390246 with p-value of: 0.344662280080847
Cramers V: 0.07842776698431711
Chi square of Disgust : 21.438540940766547 with p-value of: 8.535270195513537e-05
Cramers V: 0.1992511022357474
Chi square of Fear : 3.321493902439025 with p-value of: 0.34466228008084693
Cramers V: 0.07842776698431712
Chi square of Happiness : 1.416441637630662 with p-value of: 0.7016852861743752
Cramers V: 0.0512156232968653
Chi square of Emo_Neutral : 5.045566202090593 with p-value of: 0.16849080896437843
Cramers V: 0.09666251142497065
Chi square of Sadness : 14.351746515679443 with p-value of: 0.002463439075194692
Cramers V: 0.16302548378204354

AFFECT

Chi square of Aggressiv : 10.951228222996514 with p-value of: 0.011992547597151456
Cramers V: 0.14240804845516408
Chi square of Cheerful : 9.836772648083626 with p-value of: 0.02000582312257192
Cramers V: 0.1349

Unnamed: 0,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile
Male,1.333567,0.579005,-1.482506,-0.451751
Female,-1.333567,-0.579005,1.482506,0.451751


If we have a look at the p-values regarding the different emotions, we can see significant differences in boredom, disgust, fear, neutral and sadness and can reject our hypothesis.
To further investigate where the differences are, we'll have a look at the standardized residuals. 
Regarding boredom we see that the main differences between females and males lie between the 1st and 2nd quartile: Females have a value of ~3.11 in the 1st quartile, while males have a value of ~3.04 in the 2nd quartile. Regarding disgust we see that females tend to have values above the median (positive values in the 3rd and 4th Quartile) whereas males tend to have values below the median (positive values in 1st and 2nd Quartile). Looking at fear we see that females tend to have lower values (~3.11 in 1st Quartile) while males have a value of ~1.37 for the 2nd Quartile and ~2.04 for the 3rd Quartile, the 4th quartile does not show great differences. This means, that men tend to have more values around the median than females. Looking at neutral (emotion) we can see that males and females only differ between the 1st and 2nd quartile (female 1st quartile ~3.1; male 2nd quartile ~2.7). Regarding sadness, we see that females and males differ in the extremes: females have a value of ~4.10 for the 1st quartile whereas males have a value of ~4.04 for the 4th quartile. This implies that females tend to have lower values for sadness than males.

Looking at the affect p-values, we also see statistical significance in aggressive, cheerful, intoxicated and tired; meaning the two populations are significantly different from each other and therefore again rejecting our hypotheses.
Further looking at the residuals, we are able to see where the differences are. Regarding aggressive, we see that males tend to have lower values (1st quartile ~4.17 for males) than females (4th quartile ~4.62 for females). Looking at cheerful, we see that females tend to have lower values than males, since the value for the 4th quartile for males is ~4.04, whereas the values for females for the 2nd and 3rd quartile are ~2.96 and ~1.62. Regarding intoxication we are able to see that females tend to have higher values (3rd quartile and 4th quartile positive), whereas males have lower intoxication values (1st quartile ~4.84). Regarding tiredness, we see the most differences in the 1st and 4th quartile: males have higher values for tiredness (4th quartile ~3.71) than females (1st quartile ~3.44).

Also for Arousal-Valence, we can say that the populations differ in arousal significantly. Looking at the residuals we see that females tend to have higher values than males (only 1st quartile is positive for males ~5.50).

Regarding Level of Interest, we only see a statistic significant difference in disinterest. Regarding the residuals we see that females have lower values (1st and 2nd Quartile positive) for disinterest than males.

So now we know, that females and males differ significantly regarding the distribution into the quantiles.
Now move on to academic status, the hypothesis being that the variables academic status and e.g. emotion are independent.

In [6]:
print('EMOTION\n')
emo_aca_chi2 = hp.chi2(df_emotion_char, emotion_label,'Academic' , True)
print('\nAFFECT\n')
aff_aca_chi2 = hp.chi2(df_affect_char, affect_label,'Academic', True)
print('\nAROUSAL-VALENCE\n')
ar_val_aca_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'],  'Academic',True)
print('\nLEVEL OF INTEREST\n')
loi_aca_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'],'Academic', True)
ar_val_aca_chi2[1][0]

EMOTION

Chi square of Anger : 0.9505941213258285 with p-value of: 0.8132042399757146
Cramers V: 0.04195663813911595
Chi square of Boredom : 0.15009380863039404 with p-value of: 0.9852126093766351
Cramers V: 0.016671877442678514
Chi square of Disgust : 1.350844277673546 with p-value of: 0.7170964469463956
Cramers V: 0.05001563232803554
Chi square of Fear : 3.7523452157598496 with p-value of: 0.2894780532221424
Cramers V: 0.08335938721339256
Chi square of Happiness : 2.951844903064415 with p-value of: 0.399109490932093
Cramers V: 0.07393496770892165
Chi square of Emo_Neutral : 0.15009380863039404 with p-value of: 0.9852126093766351
Cramers V: 0.016671877442678514
Chi square of Sadness : 1.6532889446881744 with p-value of: 0.6473681624396519
Cramers V: 0.05533214430931617

AFFECT

Chi square of Aggressiv : 4.152595372107567 with p-value of: 0.24545104939706014
Cramers V: 0.08769259620873833
Chi square of Cheerful : 3.3520950594121324 with p-value of: 0.3404575488246489
Cramers V: 0.07878

Unnamed: 0,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile
Grad Student,-0.387628,-1.162258,-0.913259,2.453656
PhD,0.387628,1.162258,0.913259,-2.453656


Looking at emotion, we see that Grad Students and PhDs do not differ significantly.

The same thing goes for affect, we can't see any significant resutls.

Looking at arousal valence, we see that Grad Students and PhDs differ in arousal. By looking at the residuals we see that PhDs have lower values (2nd Quartile ~2.58) than Grad Students (4th Quartile ~1.76).

Looking at Level of Interest, we can see that GradStudents and PhDs do not differ.

So, PhDs and Grad Students only differ in arousal.

Again, we do not know yet where exactly those differences are.


Now let's look at Native Speaker

In [7]:
print('EMOTION\n')
emo_age_chi2 = hp.chi2(df_emotion_char, emotion_label,'IsNativeSpeaker', True)
print('\nAFFECT\n')
aff_age_chi2 = hp.chi2(df_affect_char, affect_label, 'IsNativeSpeaker', True)
print('\nAROUSAL-VALENCE\n')
ar_val_age_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'],'IsNativeSpeaker' ,True)
print('\nLEVEL OF INTEREST\n')
loi_age_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'], 'IsNativeSpeaker',  True)

EMOTION

Chi square of Anger : 10.462144612144611 with p-value of: 0.10649119848258841
Cramers V: 0.13919174499352277
Chi square of Boredom : 10.374008424008423 with p-value of: 0.10975981176500317
Cramers V: 0.13860420885069366
Chi square of Disgust : 6.205071955071954 with p-value of: 0.40061445305667526
Cramers V: 0.10719549426572923
Chi square of Fear : 6.1441821691821685 with p-value of: 0.40723419803423266
Cramers V: 0.10666824798465159
Chi square of Happiness : 6.3730519480519465 with p-value of: 0.38272290337583825
Cramers V: 0.10863677117784774
Chi square of Emo_Neutral : 9.75539663039663 with p-value of: 0.13533795785838437
Cramers V: 0.13440814452833325
Chi square of Sadness : 6.849271674271674 with p-value of: 0.3350118012481717
Cramers V: 0.11262253963499683

AFFECT

Chi square of Aggressiv : 6.528474903474904 with p-value of: 0.36665916243772895
Cramers V: 0.10995348261773404
Chi square of Cheerful : 5.346691821691819 with p-value of: 0.500176183426803
Cramers V: 0.099505

## Post-Hoc tests for age and native speaker, as they have three different groups

If a significant p-value for the category 'NativeSpeaker' is found, we do not yet know which groups differ significantly from each other, so post-hoc testing is done for this character feature.

In [8]:
print('EMOTION\n')
print('post-hoc emotions and different groups')
emo_reject_list, emo_corrected_p_vals, emo_combinations, emo_residuals= hp.chi2_post_hoc(df_emotion_char,emotion_label, 'IsNativeSpeaker', 'bonferroni', True, True)
print('\nAFFECT\n')
print('\n post-hoc affect and different groups')
aff_reject_list, emo_corrected_p_vals, emo_combinations, aff_residuals = hp.chi2_post_hoc(df_affect_char, affect_label, 'IsNativeSpeaker' ,'bonferroni', True, True)
print('\nAROUSAL-VALENCE\n')
print('\n post-hoc arousal-valence and different groups')
ar_val_reject_list, ar_val_corrected_p_vals, ar_val_combinations, ar_val_residuals = hp.chi2_post_hoc(df_ar_val_char, ['Arousal', 'Valence'], 'IsNativeSpeaker', 'bonferroni',True, True)
print('\nLEVEL OF INTEREST\n')
print('\n post-hoc level of intereset and different groups')
loi_reject_list, loi_corrected_p_vals, loi_combinations, loi_residuals = hp.chi2_post_hoc(df_loi_char, ['Normal Interest', 'High Interest'], 'IsNativeSpeaker', 'bonferroni', True, True)

EMOTION

post-hoc emotions and different groups
Anger
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [0.94784137 1.         1.        ]
Boredom
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1.         0.69507846 0.96736552]
Disgust
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1. 1. 1.]
Fear
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1.         1.         0.98814352]
Happiness
Combinations: [('Asian Non-Nati