In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns
import Helper as hp

#Load Data
data = pd.read_csv("UIST2019_OpenSMILE.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

data_short = pd.read_csv("UIST_2019_short_samples_OpenSMILE.csv")

df_emotion_s = data_short[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect_s = data_short[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi_s = data_short[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val_s = data_short[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion_s['Char_ID'] = df_emotion_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect_s['Char_ID'] = df_affect_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi_s['Char_ID'] = df_loi_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val_s['Char_ID'] = df_ar_val_s['Filename'].replace('\D+','', regex = True).copy(deep=True)

#Create new column to keep track of the row being a short or a long sample
df_emotion_s['IsShort'] = 1.0
df_affect_s['IsShort'] = 1.0
df_loi_s['IsShort'] = 1.0
df_ar_val_s['IsShort'] = 1.0

df_emotion['IsShort'] = 0.0
df_affect['IsShort'] = 0.0
df_loi['IsShort'] = 0.0
df_ar_val['IsShort'] = 0.0

#Now Combine two tables underneath
df_emotion = pd.concat([df_emotion,df_emotion_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_affect = pd.concat([df_affect, df_affect_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_loi = pd.concat([df_loi, df_loi_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_ar_val = pd.concat([df_ar_val, df_ar_val_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])

df_loi = df_loi.rename(columns = {'High Interest': 'High_Interest'})

## Let's start with the character feature 'sex'

In [3]:

# Start with model and sex
print('Emotion')
logreg_emo_sex = hp.multiLogReg(df_emotion, 'Emotion', 'IsShort',True) #boolean set prohibitWarnings to True
print(logreg_emo_sex.summary())

print('\nAffect')
logreg_aff_sex = hp.multiLogReg(df_affect, 'Affect', 'IsShort', True)
print(logreg_aff_sex.summary())

print('\nLevel of Interest')
logreg_loi_sex = hp.multiLogReg(df_loi, 'LOI', 'IsShort', True) 
print(logreg_loi_sex.summary())

print('\nArousal-Valence')
logreg_ar_val_sex = hp.multiLogReg(df_ar_val, 'Arousal-Valence', 'IsShort', True) 
print(logreg_ar_val_sex.summary())

Emotion
         Current function value: 0.614250
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:                IsShort   No. Observations:                  550
Model:                          Logit   Df Residuals:                      542
Method:                           MLE   Df Model:                            7
Date:                Wed, 21 Oct 2020   Pseudo R-squ.:                  0.1136
Time:                        14:37:30   Log-Likelihood:                -337.84
converged:                      False   LL-Null:                       -381.14
                                        LLR p-value:                 6.150e-16
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept     892.9634   1.22e+05      0.007      0.994   -2.39e+05    2.41e+05
Anger        -864.0617   1.22e+05     -0.007      0.99

