# CHI Square Test
To further look into differences in our data, we conduct multiple chi² tests to see if there are any significant differences between females/males and grad students/PhDs regarding different emotion, affect, level of interest and arousal valence attributes.

## Import relevant libraries

In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import sklearn.preprocessing as pp
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns
import Helper as hp

## Load .csv data with results of OpenSMILE Analysis
First we load .csv data and clean it (removing of NaNs), then we store information of all files in seperate panda dataframes containing information about affect, emotion and valence/arousal for all participants.

In [2]:
data = pd.read_csv("CHI_2019_FULL.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)



## Let's load information about the speakers
The speaker ID is saved in a single .csv file containing four important columns: ID, Age, Sex and Acadedmic Status. Since before loaded OpenSMILE csv files are named using the corresponding index (ex. speaker with id 0 has two files 0_a.csv and 0_b.csv), so that a link can be created

In [3]:
char_data = pd.read_csv("CHI_2019_CharacterData.csv")  

#Join above tables and Character Tables

#To Join DataFrames we have to cast the column on which we want to join to int, so that both columns have the same data type
char_data['ID'] = char_data['ID'].astype(int)
df_ar_val['Char_ID'] = df_ar_val['Char_ID'].astype(int)
df_emotion['Char_ID'] = df_emotion['Char_ID'].astype(int)
df_affect['Char_ID'] = df_affect['Char_ID'].astype(int)
df_loi['Char_ID'] = df_loi['Char_ID'].astype(int)

#Safe new data frames
df_ar_val_char = df_ar_val.merge(char_data, how = 'left', left_on='Char_ID', right_on='ID')
df_emotion_char = df_emotion.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_affect_char = df_affect.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_loi_char = df_loi.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')

#Now, we only want to have data containing information about the answers
#For that we need to extract from the filename column, whether the file was part of an answer
#a = answer, p = presentation, q = question
#sentence_type should be the same for all tables, but just to be sure
arval_sentence_type = df_ar_val_char.Filename.str.replace('\d+','').str[3:-4]
df_ar_val_char['SentenceType'] = arval_sentence_type
emo_sentence_type = df_emotion_char.Filename.str.replace('\d+','').str[3:-4]
df_emotion_char['SentenceType'] = emo_sentence_type
aff_sentence_type = df_affect_char.Filename.str.replace('\d+','').str[3:-4]
df_affect_char['SentenceType'] = aff_sentence_type
loi_sentence_type = df_loi_char.Filename.str.replace('\d+','').str[3:-4]
df_loi_char['SentenceType'] = loi_sentence_type

#Now select only those who have SentenceType == 'a'
df_ar_val_char = df_ar_val_char.loc[df_ar_val_char['SentenceType'] == 'p']
df_emotion_char = df_emotion_char.loc[df_emotion_char['SentenceType'] == 'p']
df_affect_char = df_affect_char.loc[df_affect_char['SentenceType'] == 'p']
df_loi_char = df_loi_char.loc[df_loi_char['SentenceType'] == 'p']

#For affect, we will have to drop the intoxication column and thus we will re-normalize the other values
affect_label.remove('Intoxicated')
df_affect_char = df_affect_char.drop(['Intoxicated'], axis = 1)
norm_test = pp.normalize(df_affect_char[affect_label], norm = 'l1')
df_affect_char[affect_label] = norm_test

df_loi_char['Normal Interest'] = df_loi_char['Disinterest'] + df_loi_char['Normal']
df_loi_char = df_loi_char.drop(['Disinterest', 'Normal'], axis = 1)
loi_label = ['Normal Interest', 'High Interest']

#Now let's compensate for multiple samples of the same person
IDs = df_emotion_char['Char_ID'].copy()
IDs.drop_duplicates()

# I know that for loops make the following operations very very slow, but I could not find a way to get this
# working with pd.apply and lambda functions... 

for i in IDs:
    #first for emotion
    for l in emotion_label:    
        emo = df_emotion_char.loc[df_emotion_char.Char_ID == i,l]
        emo_neu = hp.constructMedianSeries(emo)
        df_emotion_char.loc[df_emotion_char.Char_ID == i,l] = emo_neu
    #Then for affect
    for l in affect_label:    
        aff = df_affect_char.loc[df_affect_char.Char_ID == i,l]
        aff_neu = hp.constructMedianSeries(aff)
        df_affect_char.loc[df_affect_char.Char_ID == i,l] = aff_neu  
    for l in loi_label:    
        loi = df_loi_char.loc[df_loi_char.Char_ID == i,l]
        loi_neu = hp.constructMedianSeries(loi)
        df_loi_char.loc[df_loi_char.Char_ID == i,l] = loi_neu  
    for l in ['Arousal', 'Valence']:    
        arval = df_ar_val_char.loc[df_ar_val_char.Char_ID == i,l]
        arval_neu = hp.constructMedianSeries(arval)
        df_ar_val_char.loc[df_ar_val_char.Char_ID == i,l] = arval_neu  
        
#Now let's drop the duplicate values, since we only need one row now per person
df_emotion_char.drop_duplicates(subset=['ID'], inplace = True)
df_affect_char.drop_duplicates(subset=['ID'], inplace = True)
df_loi_char.drop_duplicates(subset=['ID'], inplace = True)
df_ar_val_char.drop_duplicates(subset=['ID'], inplace = True)    

## Chi-squared Test of Independence
We Start with characteristic sex. The null hypothesis states that the two categorical variables sex and e.g. emotion are independent.

Since we have float data and chi² needs integer data, such as observation counts, we have to convert our data. To illustrate how this is done, we'll look at a specific emotion, 'Anger'. We need to make sure that in our observation count, we do not have any cells with a value of less than 5, since this yields errors and may falsify the result. So we calculate the quantiles of our emotion 'Anger', which yields us three thresholds to compare the float data. This way, we can count how many samples were in the 1st, 2nd, 3rd or 4th Quantile. We want to compare two (or more) groups, so we compare only the female values and sort them into quartiles, then for male values. This yields a 2x4 table. An example table is printed below. This table is used to calculate the chi2 statistic. Note that the function 'calcFrequencyTable' takes in a pd.DataFrame, not a pd.Series and returns an array of pd.DataFrames. This means, that the function calculates these tables for all different emotions defined in e.g. emotion_label.

In [4]:
#Example Frequency Table for the emotion 'Anger':
#Since the function does the table calculation for all different emotions, we only want to select the first table
#which holds the table for 'anger' (since it's the first element, see declaration of emotion_label at the start)
anger_table = hp.calcFrequencyTable(df_emotion_char, emotion_label, 'Sex')[0]
anger_table

Unnamed: 0,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile
Male,34,31,28,27
Female,27,30,32,34


In [7]:
print('EMOTION\n')
emo_sex_chi2 = hp.chi2(df_emotion_char, emotion_label,'Sex',  True)
print('\nAFFECT\n')
aff_sec_chi2 = hp.chi2(df_affect_char, affect_label,'Sex',  True)
print('\nAROUSAL-VALENCE\n')
ar_val_sec_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'], 'Sex', True)
print('\nLEVEL OF INTEREST\n')
loi_sec_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'], 'Sex', True)
print('\nResiduals of: '+ affect_label[4])
aff_sec_chi2[1][4]

EMOTION

Chi square of Anger : 1.852862854858057 with p-value of: 0.6034994971411449
Chi square of Boredom : 5.071932726909235 with p-value of: 0.16660502111256176
Chi square of Disgust : 49.07317473010795 with p-value of: 1.2584955346502562e-10
Chi square of Fear : 8.41123950419832 with p-value of: 0.038234923728513295
Chi square of Happiness : 2.504328268692523 with p-value of: 0.4745093760896385
Chi square of Emo_Neutral : 5.919056377449019 with p-value of: 0.11561540839176661
Chi square of Sadness : 35.96953818472611 with p-value of: 7.600252668627796e-08

AFFECT

Chi square of Aggressiv : 26.394308276689326 with p-value of: 7.885894974468189e-06
Chi square of Cheerful : 9.985249900039983 with p-value of: 0.018691933731315072
Chi square of Nervous : 3.4979223310675724 with p-value of: 0.3210316874895997
Chi square of Aff_Neutral : 4.538518092762895 with p-value of: 0.20888018638911474
Chi square of Tired : 9.587375049980007 with p-value of: 0.022419775548845385

AROUSAL-VALENCE

Ch

Unnamed: 0,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile
Male,-0.332447,-2.403845,0.110205,2.626694
Female,0.332447,2.403845,-0.110205,-2.626694


If we take a look at the above p-values, we see significant differences in emotion for disgust, fear and sadness. Regarding the residuals, we see that females are more disgusted than males (4th quartile ~6.25), 1st and 2nd quartile for males: ~4.70 and ~2.03. Further looking at fear, we see that females tend to have more 'extreme' values, meaning that the 1st quartile and 4th quartile are positive (~2.11 and ~0.92). Looking at sadness, we see that (just as in the previous analysis) males tend to be more sad than females (3rd quartile ~1.3, 4th quartile ~4.70).

Looking at the p-values of affect, we can see significant difference for aggressive, cheerful, intoxicated and tired. Further analyzing the residuals, we can see that females tend to be more aggressive (4th quartile ~4.18) than males (1st quartile ~3.51, 2nd quartile ~1.44). Looking at cheerful we see that males tend to have higher values (4th quartile ~3.51) than females (1st to 3rd quartile positive). Regarding intoxication, we see that females tend to have values above the median (3rd and 4th quartile positive (~1.68 and ~2.70), while males tend to have values below the median (1st quartile ~2.92, 2nd quartile ~1.44). Lastly looking at tiredness, we see that males tend to be more tired than females, as males have positive values for the 3rd and 4th quartile, and females have positive values for the 1st and 2nd quartile.

If we take a look at the p-values for arousal valence, we see arousal and valence show significant differences between males and females. Looking at the residuals, we can see that females have higher values of arousal than males (1st quartile for males ~6.18, 3rd Q for females ~2.27 and 4th Q for females ~3.59). Regarding valence, we see that females tend to have values below the median (positive values for 1st Q ~3.88 and 2nd Q ~1.22), whereas males tend to have values above the median (3rd Q ~3.68; 4th Q ~1.44).

Lastly if we take a look at the p-values for level of interest, we can see significant differences for disinterest, normal and high interest. Further investigating the residuals tells us, that females tend to have lower values for disinterest than males (1st quartile ~3.88 for females). Furthermore females tend to have lower values for normal interest than males (1st quartile ~3.29 for females). This implies, that females will have higher values for high interest. By looking at the residuals we are able to confirm this: the only positive value for females is at the 4th quartile (~3.29).

Now move on to academic status, the hypothesis being that the variables academic status and e.g. emotion are independent.

In [8]:
print('EMOTION\n')
emo_aca_chi2 = hp.chi2(df_emotion_char, emotion_label,'Academic' , True)
print('\nAFFECT\n')
aff_aca_chi2 = hp.chi2(df_affect_char, affect_label,'Academic', True)
print('\nAROUSAL-VALENCE\n')
ar_val_aca_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'],  'Academic',True)
print('\nLEVEL OF INTEREST\n')
loi_aca_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'],'Academic', True)
print('\nResiduals of: '+ affect_label[2])
ar_val_aca_chi2[1][1]

EMOTION

Chi square of Anger : 3.4865219492385586 with p-value of: 0.3225143905851229
Chi square of Boredom : 14.325620179278717 with p-value of: 0.0024938227522766205
Chi square of Disgust : 4.059207439924119 with p-value of: 0.25514136264208975
Chi square of Fear : 5.0209254267292085 with p-value of: 0.17027127876206108
Chi square of Happiness : 5.921386334644762 with p-value of: 0.11549822467643807
Chi square of Emo_Neutral : 17.527621430060456 with p-value of: 0.0005503842074200204
Chi square of Sadness : 1.5291070302286158 with p-value of: 0.6755689192799226

AFFECT

Chi square of Aggressiv : 6.027650831798139 with p-value of: 0.1102726742789971
Chi square of Cheerful : 4.416317764082106 with p-value of: 0.2198771126758322
Chi square of Nervous : 2.199556369612484 with p-value of: 0.5320357578490797
Chi square of Aff_Neutral : 6.818836862543517 with p-value of: 0.0779017889860497
Chi square of Tired : 4.582277905224589 with p-value of: 0.20506765441019834

AROUSAL-VALENCE

Chi squ

Unnamed: 0,1st Quartile,2nd Quartile,3rd Quartile,4th Quartile
Grad Student,2.460444,-1.264283,0.52861,-1.714855
PhD,-2.460444,1.264283,-0.52861,1.714855


Looking at the p-values for emotion, we see significant differences between Grad Students and PhDs for boredom and neutral (emotion). By further looking at the residuals, we are able to see that PhDs tend to have lower values (1st quartile ~3.21) for boredom than grad students (2nd quartile ~2.76). Looking at neutral (emotion), we see that PhDs tend to be more 'extreme', meaning a value of ~3.21 for the 1st quartile and a value of ~1.42 for the 4th quartile.

If we take a look at the p-values for affect, we only see statistical significant differences in intoxication. Looking at the residuals we see that grad students tend to have lower values for intoxication (1st Q ~2.03) but also have a value of ~0.52 for the 4th quartile. But we can say that the main differences in intoxication between gard students and phds lies between the 1st and 3rd quartile, where phds tend to be more intoxicated (3rd Q ~2.47).

Regarding the p-values of arousal-valence, we can only see a statistical significant difference between PhDs and grad students in valence. Looking at the residuals we can say that grad students (1st Q ~2.46, 3rd ~0.53) tend to have smaller valence values than PhDs (2nd Q ~1.26, 4th Q ~1.71)

Now let's look at Native Speaker.

In [9]:
print('EMOTION\n')
emo_age_chi2 = hp.chi2(df_emotion_char, emotion_label,'IsNativeSpeaker', True)
print('\nAFFECT\n')
aff_age_chi2 = hp.chi2(df_affect_char, affect_label, 'IsNativeSpeaker', True)
print('\nAROUSAL-VALENCE\n')
ar_val_age_chi2 = hp.chi2(df_ar_val_char, ['Arousal', 'Valence'],'IsNativeSpeaker' ,True)
print('\nLEVEL OF INTEREST\n')
loi_age_chi2 = hp.chi2(df_loi_char, ['Normal Interest', 'High Interest'], 'IsNativeSpeaker',  True)

EMOTION

Chi square of Anger : 6.4785850653572545 with p-value of: 0.37176381692808363
Chi square of Boredom : 11.755506837474051 with p-value of: 0.067650756540378
Chi square of Disgust : 11.626748842642147 with p-value of: 0.07083274759732817
Chi square of Fear : 4.80715759404284 with p-value of: 0.5687740020632577
Chi square of Happiness : 7.111307941155313 with p-value of: 0.31067613394676324
Chi square of Emo_Neutral : 5.94815807930562 with p-value of: 0.42902247507469005
Chi square of Sadness : 9.535061560624218 with p-value of: 0.14564680721979234

AFFECT

Chi square of Aggressiv : 18.43985561684923 with p-value of: 0.0052220394778411975
Chi square of Cheerful : 5.2922537478783935 with p-value of: 0.5069122775374593
Chi square of Nervous : 3.2009993701106736 with p-value of: 0.7832293493762938
Chi square of Aff_Neutral : 3.4366456932878644 with p-value of: 0.7523787429324115
Chi square of Tired : 7.619268479641573 with p-value of: 0.2673441330670959

AROUSAL-VALENCE

Chi square 

## Post-Hoc tests for age and native speaker, as they have three different groups

If a significant p-value for the category 'NativeSpeaker' is found, we do not yet know which groups differ significantly from each other, so post-hoc testing is done for this character feature.

In [10]:
print('EMOTION\n')
print('post-hoc emotions and different groups')
emo_reject_list, emo_corrected_p_vals, emo_combinations, emo_residuals= hp.chi2_post_hoc(df_emotion_char,emotion_label, 'IsNativeSpeaker', 'bonferroni', True, True)
print('\nAFFECT\n')
print('\n post-hoc affect and different groups')
aff_reject_list, emo_corrected_p_vals, emo_combinations, aff_residuals = hp.chi2_post_hoc(df_affect_char, affect_label, 'IsNativeSpeaker' ,'bonferroni', True, True)
print('\nAROUSAL-VALENCE\n')
print('\n post-hoc arousal-valence and different groups')
ar_val_reject_list, ar_val_corrected_p_vals, ar_val_combinations, ar_val_residuals = hp.chi2_post_hoc(df_ar_val_char, ['Arousal', 'Valence'], 'IsNativeSpeaker', 'bonferroni',True, True)
print('\nLEVEL OF INTEREST\n')
print('\n post-hoc level of intereset and different groups')
loi_reject_list, loi_corrected_p_vals, loi_combinations, loi_residuals = hp.chi2_post_hoc(df_loi_char, ['Normal Interest', 'High Interest'], 'IsNativeSpeaker', 'bonferroni', True, True)

EMOTION

post-hoc emotions and different groups
Anger
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1.         1.         0.81143585]
Boredom
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1.         0.43215994 0.56231893]
Disgust
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [0.12305687 0.96552376 1.        ]
Fear
Combinations: [('Asian Non-Native', 'Europ. Non-Native'), ('Asian Non-Native', 'Native Speaker'), ('Europ. Non-Native', 'Native Speaker')]
Reject List: [False False False]
Corrected p-values: [1.         1.         0.95569894]
Happiness
Combina