In [5]:
import pandas as pd
import glob
import numpy as np

# Helper Functions

In [77]:
def merge_text_hume_data(df_facial, df_text):

    df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
    df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()
    df_merged = pd.merge(df_text, df_facial, on=['start_patch'], how='inner').drop('start_patch', axis=1)
    return df_merged

In [78]:
def prepare_text_data(path):
    df_text = pd.read_csv('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv', index_col=0)
    df_text['start'] = np.floor(((df_text['start']/60)/5))*5
    df_text.drop(['end', 'segment_id'], axis=1, inplace=True)
    df_text.columns = ['start_patch', 'text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral']
    return df_text

In [72]:
def prepare_hume_data(path):
    df_facial = pd.read_csv(path, index_col=0)
    df_facial.drop('start', axis=1, inplace=True)
    df_facial.columns = ['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral', 'start_patch']
    return df_facial
    

In [104]:
def create_merged_binary_emotion_data(df_merged):
    df_binary=pd.DataFrame(df_merged[['facial_anger', 'facial_disgust','facial_fear', 'facial_sadness']].mean(axis=1), columns=['facial_agg_negative'])
    df_binary['facial_agg_positive'] = df_merged[['facial_neutral', 'facial_surprise','facial_joy']].mean(axis=1)
    df_binary['text_agg_negative'] = df_merged[['text_anger', 'text_disgust','text_fear', 'text_sadness']].mean(axis=1)
    df_binary['text_agg_positive'] = df_merged[['text_neutral', 'text_surprise','text_joy']].mean(axis=1)

    return df_binary

# Hume/Text correlations

In [81]:
df_facial_vocals_morning = prepare_hume_data('../data/hume/processed/Cuban_vocals_morning.csv')
df_text_vocals_morning = prepare_text_data('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv')

df_merged = merge_text_hume_data(df_facial_vocals_morning, df_text_vocals_morning)
df_merged.head()

  df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
  df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()


Unnamed: 0,text_anger,text_disgust,text_fear,text_joy,text_sadness,text_surprise,text_neutral,facial_anger,facial_disgust,facial_fear,facial_joy,facial_sadness,facial_surprise,facial_neutral
0,0.061651,0.367261,0.031829,0.005832,0.496214,0.022023,0.01519,0.073753,0.132861,0.16554,0.128168,0.181144,0.103265,0.220051
1,0.022763,0.054074,0.026508,0.105159,0.552026,0.033015,0.206455,0.077141,0.138249,0.166908,0.121519,0.192415,0.098344,0.223063
2,0.031876,0.049549,0.013774,0.007627,0.696988,0.015867,0.18432,0.081929,0.108667,0.144073,0.165099,0.142597,0.108096,0.212238
3,0.012535,0.029529,0.004095,0.21381,0.620025,0.012278,0.107727,0.085003,0.111863,0.150903,0.151444,0.153683,0.110996,0.226161
4,0.133437,0.247246,0.01839,0.006704,0.418027,0.017145,0.159051,0.08119,0.119679,0.144456,0.144174,0.167758,0.100934,0.229378


### Calculate correlation

In [82]:
df_vocals_morning_corr = df_merged.corr().drop(['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'], axis=1)
df_vocals_morning_corr = df_vocals_morning_corr.drop(['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'],axis=0)
df_vocals_morning_corr

Unnamed: 0,facial_anger,facial_disgust,facial_fear,facial_joy,facial_sadness,facial_surprise,facial_neutral
text_anger,0.398178,-0.182749,0.089047,0.097939,-0.212418,0.35895,0.015163
text_disgust,-0.118656,0.246331,0.161601,-0.12043,0.143068,-0.061674,0.134199
text_fear,0.185939,-0.020421,0.284355,-0.139052,0.019175,0.091603,0.33448
text_joy,-0.17279,0.211575,-0.183323,-0.102115,0.299441,-0.386459,0.283491
text_sadness,0.049356,-0.030553,0.076205,0.02166,-0.050236,0.038512,-0.132765
text_surprise,-0.122753,-0.136894,-0.296842,0.226442,-0.183378,-0.034425,-0.062296
text_neutral,0.077761,-0.15176,0.107488,-0.028667,-0.112817,0.245388,-0.184968


### Correlation aggregated

In [84]:
df_facial_vocals_morning = prepare_hume_data('../data/hume/processed/Cuban_vocals_morning.csv')
df_text_vocals_morning = prepare_text_data('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv')

In [105]:
df_merged = merge_text_hume_data(df_facial_vocals_morning, df_text_vocals_morning)


  df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
  df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()


In [106]:
df_facial_text_binary

Unnamed: 0,facial_agg_negative,facial_agg_positive,text_agg_positive,text_agg_negative
0,0.138325,0.150494,0.014348,0.239239
1,0.143678,0.147642,0.114876,0.163843
2,0.119316,0.161811,0.069271,0.198046
3,0.125363,0.162867,0.111272,0.166546
4,0.128271,0.158162,0.060967,0.204275
5,0.125741,0.158316,0.206487,0.095134
6,0.13079,0.154684,0.166232,0.125326
7,0.124946,0.159837,0.083405,0.187446
8,0.134568,0.153506,0.126548,0.155089
9,0.12266,0.162746,0.034249,0.224313


In [110]:
df_facial_text_binary = create_merged_binary_emotion_data(df_merged)
df_facial_text_binary_corr = df_facial_text_binary.corr().drop(['text_agg_positive','text_agg_negative'], axis=1)
df_facial_text_binary_corr = df_facial_text_binary_corr.drop(['facial_agg_positive','facial_agg_negative'], axis=0)
df_facial_text_binary.corr()

Unnamed: 0,facial_agg_negative,facial_agg_positive,text_agg_negative,text_agg_positive
facial_agg_negative,1.0,-0.818848,0.0663,-0.0663
facial_agg_positive,-0.818848,1.0,0.009714,-0.009714
text_agg_negative,0.0663,0.009714,1.0,-1.0
text_agg_positive,-0.0663,-0.009714,-1.0,1.0


# Binary Sentiment data to facial correlation

In [115]:
df_facial_vocals_morning = prepare_hume_data('../data/hume/processed/Cuban_vocals_morning.csv')
df = pd.read_csv('../data/emotion_from_transcript/binary_sentiment/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv', index_col=0)
df

Unnamed: 0,segment_id,start,end,positive,negative
0,1,129,153,0,1
1,3,259,270,0,1
2,4,273,284,1,0
3,5,312,325,1,0
4,6,411,424,0,1
...,...,...,...,...,...
61,105,9308,9328,1,0
62,108,9683,9701,1,0
63,109,9707,9718,1,0
64,110,9720,9748,1,0
