In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns
import Helper as hp

#Load Data
data = pd.read_csv("UIST2019_OpenSMILE.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

data_short = pd.read_csv("UIST_2019_short_samples_OpenSMILE.csv")

df_emotion_s = data_short[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect_s = data_short[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi_s = data_short[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val_s = data_short[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion_s['Char_ID'] = df_emotion_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect_s['Char_ID'] = df_affect_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi_s['Char_ID'] = df_loi_s['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val_s['Char_ID'] = df_ar_val_s['Filename'].replace('\D+','', regex = True).copy(deep=True)

#Create new column to keep track of the row being a short or a long sample
df_emotion_s['IsShort'] = 1.0
df_affect_s['IsShort'] = 1.0
df_loi_s['IsShort'] = 1.0
df_ar_val_s['IsShort'] = 1.0

df_emotion['IsShort'] = 0.0
df_affect['IsShort'] = 0.0
df_loi['IsShort'] = 0.0
df_ar_val['IsShort'] = 0.0

#Now Combine two tables underneath
df_emotion = pd.concat([df_emotion,df_emotion_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_affect = pd.concat([df_affect, df_affect_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_loi = pd.concat([df_loi, df_loi_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])
df_ar_val = pd.concat([df_ar_val, df_ar_val_s], ignore_index = True, keys = ['Char_ID', 'IsShort'])

## Now let's use logistic Regression to predict the Length of the Audio File

In [2]:

# Start with model and sex
print('Emotion')
df_emo_X = df_emotion[emotion_label]
df_emo_Y = df_emotion['IsShort']
logreg_emo = sm.Logit(df_emo_Y, df_emo_X).fit()
print(logreg_emo.summary())

print('\nAffect')
df_aff_X = df_affect[affect_label]
df_aff_Y = df_affect['IsShort']
logreg_aff = sm.Logit(df_aff_Y, df_aff_X).fit()
print(logreg_aff.summary())

print('\nLevel of Interest')
df_loi_X = df_loi[loi_label]
df_loi_Y = df_loi['IsShort']
logreg_loi = sm.Logit(df_loi_Y, df_aff_X).fit()
print(logreg_loi.summary())

print('\nArousal-Valence')
df_loi_X = df_ar_val[['Arousal', 'Valence']]
df_loi_Y = df_ar_val['IsShort']
logreg_ar_val = sm.Logit(df_loi_Y, df_loi_X).fit()
print(logreg_ar_val.summary())

Emotion
Optimization terminated successfully.
         Current function value: 0.614233
         Iterations 10
                           Logit Regression Results                           
Dep. Variable:                IsShort   No. Observations:                  550
Model:                          Logit   Df Residuals:                      543
Method:                           MLE   Df Model:                            6
Date:                Wed, 21 Oct 2020   Pseudo R-squ.:                  0.1136
Time:                        15:51:39   Log-Likelihood:                -337.83
converged:                       True   LL-Null:                       -381.14
                                        LLR p-value:                 1.521e-16
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Anger          28.9112     94.894      0.305      0.761    -157.078     214.901
Boredom         0

In [3]:
print('Emotion')
df_emo_X = df_emotion[emotion_label]
df_emo_Y = df_emotion['IsShort']
logreg_emo = sm.Logit(df_emo_Y, df_emo_X).fit()
print(logreg_emo.summary())

Emotion
Optimization terminated successfully.
         Current function value: 0.614233
         Iterations 10
                           Logit Regression Results                           
Dep. Variable:                IsShort   No. Observations:                  550
Model:                          Logit   Df Residuals:                      543
Method:                           MLE   Df Model:                            6
Date:                Wed, 21 Oct 2020   Pseudo R-squ.:                  0.1136
Time:                        16:41:50   Log-Likelihood:                -337.83
converged:                       True   LL-Null:                       -381.14
                                        LLR p-value:                 1.521e-16
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Anger          28.9112     94.894      0.305      0.761    -157.078     214.901
Boredom         0

In [None]:
print('Emotion')
df_emo_X = df_emotion[emotion_label]
df_emo_X2 = df_emo_X.drop(['Anger','Boredom','Emo_Neutral'])
df_emo_Y = df_emotion['IsShort']
logreg_emo = sm.Logit(df_emo_Y, df_emo_X2).fit()
print(logreg_emo.summary())