In [1]:
import numpy as np
import pandas as pd
from os import listdir
import matplotlib.pyplot as plt
import itertools as it
from statsmodels.sandbox.stats.multicomp import multipletests
import statsmodels.api as sm
#import nltk
import scipy.stats as st
import statsmodels.formula.api as smf
import seaborn as sns
import Helper as hp

#Load Data
data = pd.read_csv("UIST2019_OpenSMILE.csv")

#Set Labels 
emotion_label = ['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness']
affect_label = ['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired']
loi_label = ['Disinterest', 'Normal', 'High Interest']

#Get specific data and save it into new data frames
# We use the pandas .copy(deep=True) function to prevent the SettingWithCopyWarning we would otherwise get. Since we do
# not write, but only read from the data, the warning does not affect the data frames
df_emotion = data[['Anger', 'Boredom', 'Disgust', 'Fear', 'Happiness', 'Emo_Neutral', 'Sadness', 'Filename']].copy(deep=True)
df_affect = data[['Aggressiv', 'Cheerful', 'Intoxicated', 'Nervous', 'Aff_Neutral', 'Tired', 'Filename']].copy(deep=True)
df_loi = data[['Disinterest', 'Normal', 'High Interest', 'Filename']].copy(deep=True)
df_ar_val = data[['Arousal', 'Valence', 'Filename']].copy(deep=True)
#For further usage, we want to append the CharacterID as a column, which is saved with other information in the filename
#Since we only want the digits, we can remove all non-digit characters of the filename column and append the column to the df

df_emotion['Char_ID'] = df_emotion['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_affect['Char_ID'] = df_affect['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_loi['Char_ID'] = df_loi['Filename'].replace('\D+','', regex = True).copy(deep=True)
df_ar_val['Char_ID'] = df_ar_val['Filename'].replace('\D+','', regex = True).copy(deep=True)

#Load data about speakers
char_data = pd.read_csv("UIST2019_CharacterData.csv")  

#Join above tables and Character Tables

#To Join DataFrames we have to cast the column on which we want to join to int, so that both columns have the same data type
char_data['ID'] = char_data['ID'].astype(int)
df_ar_val['Char_ID'] = df_ar_val['Char_ID'].astype(int)
df_emotion['Char_ID'] = df_emotion['Char_ID'].astype(int)
df_affect['Char_ID'] = df_affect['Char_ID'].astype(int)
df_loi['Char_ID'] = df_loi['Char_ID'].astype(int)

#Safe new data frames
df_ar_val_char = df_ar_val.merge(char_data, how = 'left', left_on='Char_ID', right_on='ID')
df_emotion_char = df_emotion.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_affect_char = df_affect.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')
df_loi_char = df_loi.merge(char_data, how = 'left', left_on='Char_ID', right_on= 'ID')

## Let's start with the character feature 'sex'

In [2]:
# Let's prepare the datasets containing spaces for logistic regression
temp_emotion_char = df_emotion_char.rename(columns = {'Academic Status' : 'Academic'})
temp_affect_char = df_affect_char.rename(columns = {'Academic Status' : 'Academic'})
temp_loi_char = df_loi_char.rename(columns = {'Academic Status' : 'Academic', 'High Interest': 'High_Interest'})
temp_arval_char = df_ar_val_char.rename(columns = {'Academic Status' : 'Academic'})

# Start with model and sex
print('Emotion')
logreg_emo_sex = hp.multiLogReg(temp_emotion_char, 'Emotion', 'Sex',True) #boolean set prohibitWarnings to True
print(logreg_emo_sex.summary())
logreg_aff_sex = hp.multiLogReg(temp_affect_char, 'Affect', 'Sex', True)
print('Affect')
print(logreg_aff_sex.summary())
logreg_loi_sex = hp.multiLogReg(temp_loi_char, 'LOI', 'Sex', True) 
print('Level of Interest')
print(logreg_loi_sex.summary())
logreg_ar_val_sex = hp.multiLogReg(temp_arval_char, 'Arousal-Valence', 'Sex', True) # Yields LinAlgError: Singular Matrix
print('Arousal-Valence')
print(logreg_ar_val_sex.summary())

Emotion
         Current function value: 0.366786
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:                    Sex   No. Observations:                  280
Model:                          Logit   Df Residuals:                      272
Method:                           MLE   Df Model:                            7
Date:                Tue, 29 Sep 2020   Pseudo R-squ.:                  0.2440
Time:                        13:37:52   Log-Likelihood:                -102.70
converged:                      False   LL-Null:                       -135.85
                                        LLR p-value:                 8.222e-12
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept     655.7292   2.44e+05      0.003      0.998   -4.77e+05    4.78e+05
Anger       -3490.1649   2.44e+05     -0.014      0.98



### Character feature = age

In [4]:
logreg_emo_age = hp.multiNomiLogReg(temp_emotion_char, 'Emotion', 'Age', True)
print('Emotion:')
print(logreg_emo_age.summary())
logreg_aff_age = hp.multiNomiLogReg(temp_affect_char, 'Affect', 'Age', True)
print('Affect: ')
print(logreg_aff_age.summary())
logreg_loi_age = hp.multiNomiLogReg(temp_loi_char, 'LOI', 'Age', True)
print('Level of Interest: ')
print(logreg_loi_age.summary())
logreg_ar_val_age = hp.multiNomiLogReg(temp_arval_char, 'Arousal-Valence', 'Age', True)
print('Arousal-Valence:')
print(logreg_ar_val_age.summary())
temp_emotion_char

Optimization terminated successfully.
         Current function value: nan
         Iterations 7
Emotion:
                          MNLogit Regression Results                          
Dep. Variable:                      y   No. Observations:                  156
Model:                        MNLogit   Df Residuals:                      140
Method:                           MLE   Df Model:                           14
Date:                Tue, 29 Sep 2020   Pseudo R-squ.:                     nan
Time:                        13:38:03   Log-Likelihood:                    nan
converged:                       True   LL-Null:                       -89.161
                                        LLR p-value:                       nan
  y=Age[Old]       coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept           nan        nan        nan        nan         nan         nan
Anger              

  return eXB/eXB.sum(1)[:,None]
  oldparams) > tol)):
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Unnamed: 0,Anger,Boredom,Disgust,Fear,Happiness,Emo_Neutral,Sadness,Filename,Char_ID,ID,VideoTitle,Name,Sex,Academic,Age,VideoID
0,0.000032,0.000016,0.335981,0.000006,0.000055,0.000004,0.663906,0_a.wav,0,0,3D printed Fabric: Techniques for Design and 3...,Haruki Takahashi,Male,PhD,Young,z07keSZOkO8
1,0.000004,0.000003,0.002605,0.000000,0.000000,0.000000,0.997388,0_a_a.wav,0,0,3D printed Fabric: Techniques for Design and 3...,Haruki Takahashi,Male,PhD,Young,z07keSZOkO8
2,0.001765,0.107574,0.003542,0.004003,0.002175,0.012042,0.868898,0_b_a.wav,0,0,3D printed Fabric: Techniques for Design and 3...,Haruki Takahashi,Male,PhD,Young,z07keSZOkO8
3,0.000005,0.000005,0.003151,0.000000,0.000000,0.000000,0.996839,100_a_q.wav,100,100,Question in Optimizing Portrait Lighting,Scott Klemmer,Male,PhD,Intermediate,
4,0.000334,0.000106,0.089250,0.000015,0.000135,0.000040,0.910122,101_a_q.wav,101,101,Question in Optimizing Portrait Lighting,,Male,,,
5,0.000011,0.000004,0.035261,0.000001,0.000004,0.000000,0.964719,102_a_a.wav,102,102,Proxino: Enabling Prototyping of Virtual Circu...,Te-Yen Wu,Male,Grad Student,Young,_Klvg8YSrJQ
6,0.000000,0.000000,0.000001,0.000000,0.000000,0.000000,0.999998,102_b_a.wav,102,102,Proxino: Enabling Prototyping of Virtual Circu...,Te-Yen Wu,Male,Grad Student,Young,_Klvg8YSrJQ
7,0.000183,0.000227,0.022739,0.000009,0.000012,0.000015,0.976814,103_a_q.wav,103,103,Question in Proxino,Andrea Bianchi,Male,PhD,Intermediate,
8,0.000101,0.000015,0.610082,0.000005,0.000053,0.000001,0.389744,104_a_q.wav,104,104,Question in Proxino,,Female,,,
9,0.000414,0.000673,0.047032,0.000035,0.000233,0.000078,0.951536,105_a_a.wav,105,105,PseudoBend: Producing Haptic Illusions of Stre...,Seongkook Heo,Male,PhD,Intermediate,rCYeWPUcMZU


### Character feature = academic status

In [5]:
#logreg_emo_aca = hp.multiLogReg(temp_emotion_char, 'Emotion', 'Academic', True) # Raises LinAlg Singular Matrix Error
print('Emotions: ')
#print(logreg_emo_aca.summary())
logreg_aff_aca = hp.multiLogReg(temp_affect_char, 'Affect', 'Academic', True) 
print('Affect: ')
print(logreg_aff_aca.summary())
logreg_loi_aca = hp.multiLogReg(temp_loi_char, 'LOI', 'Academic', True)
print('Level of Interest: ')
print(logreg_loi_aca.summary())
logreg_ar_val_aca = hp.multiLogReg(temp_arval_char, 'Arousal-Valence', 'Academic', True)
print('Arousal-Valence: ')
print(logreg_ar_val_aca.summary())

         Current function value: inf
         Iterations: 35


  return np.sum(np.log(self.cdf(q*np.dot(X,params))))


LinAlgError: Singular matrix