In [71]:
import pandas as pd
import numpy as np
from scipy.stats import pointbiserialr

# Helper Functions

In [72]:
def merge_text_hume_data(df_facial, df_text):

    df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
    df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()
    df_merged = pd.merge(df_text, df_facial, on=['start_patch'], how='inner').drop('start_patch', axis=1)
    return df_merged

In [73]:
def prepare_text_data(path):
    df_text = pd.read_csv('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv', index_col=0)
    df_text['start'] = np.floor(((df_text['start']/60)/5))*5
    df_text.drop(['end', 'segment_id'], axis=1, inplace=True)
    df_text.columns = ['start_patch', 'text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral']
    return df_text

In [74]:
def prepare_hume_data(path):
    df_facial = pd.read_csv(path, index_col=0)
    df_facial.drop('start', axis=1, inplace=True)
    df_facial.columns = ['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral', 'start_patch']
    return df_facial
    

In [75]:
def create_merged_binary_emotion_data(df_merged):
    df_binary=pd.DataFrame(df_merged[['facial_anger', 'facial_disgust','facial_fear', 'facial_sadness']].mean(axis=1), columns=['facial_agg_negative'])
    df_binary['facial_agg_positive'] = df_merged[['facial_neutral', 'facial_surprise','facial_joy']].mean(axis=1)
    df_binary['text_agg_negative'] = df_merged[['text_anger', 'text_disgust','text_fear', 'text_sadness']].mean(axis=1)
    df_binary['text_agg_positive'] = df_merged[['text_neutral', 'text_surprise','text_joy']].mean(axis=1)

    return df_binary

In [76]:
def facial_to_text_interval_merge(df_text, df_facial, interval):
    interval_emotions= [df_facial.loc[(df_facial['start'] >= time) & (df_facial['start'] < (time+interval))].mean(axis=0).drop(['start', 'start_patch']).to_numpy() for time in df_text['start'].to_numpy()]
    df_interval_merge = pd.concat([df_text_vocals_morning, pd.DataFrame(interval_emotions, columns=['facial_anger','facial_disgust','facial_fear','facial_joy','facial_sadness','facial_surprise','facial_neutral'])], axis=1)
    return df_interval_merge

# Hume/Text correlations

In [77]:
df_facial_vocals_morning = prepare_hume_data('../data/hume/processed/Cuban_vocals_morning.csv')
df_text_vocals_morning = prepare_text_data('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv')

df_merged = merge_text_hume_data(df_facial_vocals_morning, df_text_vocals_morning)
df_merged.head()

  df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
  df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()


Unnamed: 0,text_anger,text_disgust,text_fear,text_joy,text_sadness,text_surprise,text_neutral,facial_anger,facial_disgust,facial_fear,facial_joy,facial_sadness,facial_surprise,facial_neutral
0,0.061651,0.367261,0.031829,0.005832,0.496214,0.022023,0.01519,0.073753,0.132861,0.16554,0.128168,0.181144,0.103265,0.220051
1,0.022763,0.054074,0.026508,0.105159,0.552026,0.033015,0.206455,0.077141,0.138249,0.166908,0.121519,0.192415,0.098344,0.223063
2,0.031876,0.049549,0.013774,0.007627,0.696988,0.015867,0.18432,0.081929,0.108667,0.144073,0.165099,0.142597,0.108096,0.212238
3,0.012535,0.029529,0.004095,0.21381,0.620025,0.012278,0.107727,0.085003,0.111863,0.150903,0.151444,0.153683,0.110996,0.226161
4,0.133437,0.247246,0.01839,0.006704,0.418027,0.017145,0.159051,0.08119,0.119679,0.144456,0.144174,0.167758,0.100934,0.229378


### Calculate correlation

In [78]:
df_vocals_morning_corr = df_merged.corr().drop(['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'], axis=1)
df_vocals_morning_corr = df_vocals_morning_corr.drop(['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'],axis=0)
df_vocals_morning_corr

Unnamed: 0,facial_anger,facial_disgust,facial_fear,facial_joy,facial_sadness,facial_surprise,facial_neutral
text_anger,0.398178,-0.182749,0.089047,0.097939,-0.212418,0.35895,0.015163
text_disgust,-0.118656,0.246331,0.161601,-0.12043,0.143068,-0.061674,0.134199
text_fear,0.185939,-0.020421,0.284355,-0.139052,0.019175,0.091603,0.33448
text_joy,-0.17279,0.211575,-0.183323,-0.102115,0.299441,-0.386459,0.283491
text_sadness,0.049356,-0.030553,0.076205,0.02166,-0.050236,0.038512,-0.132765
text_surprise,-0.122753,-0.136894,-0.296842,0.226442,-0.183378,-0.034425,-0.062296
text_neutral,0.077761,-0.15176,0.107488,-0.028667,-0.112817,0.245388,-0.184968


### Correlation aggregated

In [79]:
df_facial_vocals_morning = prepare_hume_data('../data/hume/processed/Cuban_vocals_morning.csv')
df_text_vocals_morning = prepare_text_data('../data/emotion_from_transcript/emotion-english-distilroberta-base/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv')

In [80]:
df_merged = merge_text_hume_data(df_facial_vocals_morning, df_text_vocals_morning)


  df_text = df_text.groupby('start_patch')['text_anger', 'text_disgust', 'text_fear', 'text_joy', 'text_sadness', 'text_surprise', 'text_neutral'].mean().reset_index()
  df_facial = df_facial.groupby('start_patch')['facial_anger', 'facial_disgust', 'facial_fear', 'facial_joy', 'facial_sadness', 'facial_surprise', 'facial_neutral'].mean().reset_index()


In [81]:
df_facial_text_binary = create_merged_binary_emotion_data(df_merged)
df_facial_text_binary_corr = df_facial_text_binary.corr().drop(['text_agg_positive','text_agg_negative'], axis=1)
df_facial_text_binary_corr = df_facial_text_binary_corr.drop(['facial_agg_positive','facial_agg_negative'], axis=0)
df_facial_text_binary_corr

Unnamed: 0,facial_agg_negative,facial_agg_positive
text_agg_negative,0.0663,0.009714
text_agg_positive,-0.0663,-0.009714


In [82]:
df_facial_text_binary

Unnamed: 0,facial_agg_negative,facial_agg_positive,text_agg_negative,text_agg_positive
0,0.138325,0.150494,0.239239,0.014348
1,0.143678,0.147642,0.163843,0.114876
2,0.119316,0.161811,0.198046,0.069271
3,0.125363,0.162867,0.166546,0.111272
4,0.128271,0.158162,0.204275,0.060967
5,0.125741,0.158316,0.095134,0.206487
6,0.13079,0.154684,0.125326,0.166232
7,0.124946,0.159837,0.187446,0.083405
8,0.134568,0.153506,0.155089,0.126548
9,0.12266,0.162746,0.224313,0.034249


# Binary Sentiment data to facial correlation

In [83]:
df_facial_vocals_morning = pd.read_csv('../data/hume/processed/Cuban_vocals_morning.csv', index_col=0)
df_text_vocals_morning = pd.read_csv('../data/emotion_from_transcript/binary_sentiment/jazzaar-20230417-morning-JFLB-Cuban-Vocals-video Philipp.csv', index_col=0)

In [84]:
df_facial_vocals_morning = pd.read_csv('../data/hume/processed/Cuban_vocals_morning.csv', index_col=0)
df_facial_vocals_morning

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,start_patch,start
0,0.039957,0.114220,0.126544,0.169541,0.137182,0.147788,0.157061,0.0,0.1
1,0.037922,0.104910,0.110700,0.190294,0.127883,0.129474,0.150551,0.0,0.2
2,0.044073,0.118693,0.129124,0.167583,0.158697,0.130753,0.167633,0.0,0.3
3,0.041380,0.134487,0.140038,0.133327,0.198558,0.126955,0.179034,0.0,0.4
4,0.047777,0.118812,0.136461,0.141578,0.157510,0.128101,0.191475,0.0,0.5
...,...,...,...,...,...,...,...,...,...
119525,0.066564,0.165001,0.129338,0.157906,0.196070,0.089844,0.211994,160.0,9712.4
119526,0.047728,0.143472,0.094047,0.257106,0.128997,0.098761,0.148710,160.0,9712.5
119527,0.064104,0.144483,0.124494,0.160216,0.188909,0.095583,0.197851,160.0,9712.6
119528,0.064802,0.112709,0.161865,0.105411,0.165901,0.067118,0.224333,155.0,9581.7


In [112]:
df_vocals_morning_intervals = facial_to_text_interval_merge(df_text_vocals_morning, df_facial_vocals_morning, interval=1)
df_vocals_morning_intervals

Unnamed: 0,segment_id,start,end,positive,negative,facial_anger,facial_disgust,facial_fear,facial_joy,facial_sadness,facial_surprise,facial_neutral
0,1,129,153,0,1,0.068622,0.135376,0.181759,0.095179,0.202428,0.105425,0.233831
1,3,259,270,0,1,0.093623,0.114262,0.143456,0.138475,0.155632,0.092835,0.257007
2,4,273,284,1,0,0.095924,0.112696,0.160754,0.140884,0.148142,0.114377,0.226874
3,5,312,325,1,0,0.077797,0.154243,0.155600,0.106920,0.203486,0.093257,0.240097
4,6,411,424,0,1,0.070274,0.125345,0.160910,0.150853,0.181846,0.098128,0.229262
...,...,...,...,...,...,...,...,...,...,...,...,...
61,105,9308,9328,1,0,,,,,,,
62,108,9683,9701,1,0,0.115525,0.113191,0.173748,0.106511,0.184771,0.102814,0.249508
63,109,9707,9718,1,0,0.074407,0.112912,0.151828,0.183636,0.142351,0.124457,0.208415
64,110,9720,9748,1,0,0.087869,0.089916,0.232810,0.104099,0.119345,0.250630,0.164615


In [113]:
df_vocals_morning_intervals_cleaned = df_vocals_morning_intervals.drop(['segment_id', 'start', 'end', 'negative'], axis=1)
df_vocals_morning_intervals_cleaned=df_vocals_morning_intervals_cleaned.dropna(axis=0)
df_vocals_morning_intervals_cleaned
df_vocals_morning_intervals_cleaned['facial_agg_positive'] = df_vocals_morning_intervals_cleaned[['facial_neutral', 'facial_surprise','facial_joy']].mean(axis=1)
df_vocals_morning_intervals_cleaned['facial_agg_negative'] = df_vocals_morning_intervals_cleaned[['facial_anger', 'facial_disgust','facial_fear', 'facial_sadness']].mean(axis=1)

In [114]:
correlation_coefficient, p_value = pointbiserialr(df_vocals_morning_intervals_cleaned['positive'], df_vocals_morning_intervals_cleaned['facial_neutral'])

print("Point-Biserial Correlation Coefficient:", correlation_coefficient)
print("p-value:", p_value)

Point-Biserial Correlation Coefficient: -0.004665334243413859
p-value: 0.9722736270842054


In [116]:
df_fer = pd.read_csv('../data/FER_Output/processed/Cuban_vocals_morning.csv', index_col=0)
df_fer

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,neutral,start_patch
0,0.214425,0.000394,0.131845,0.240804,0.236482,0.008039,0.166783,0
1,0.315261,0.00039,0.131789,0.141539,0.214871,0.010243,0.184813,5
2,0.229541,0.00038,0.113441,0.211516,0.202054,0.014216,0.227643,10
3,0.185469,0.001125,0.187967,0.199728,0.263882,0.029651,0.130991,15
4,0.103115,0.000189,0.11005,0.193321,0.323976,0.019419,0.248989,20
5,0.128038,0.000704,0.101215,0.31028,0.254838,0.010516,0.193009,25
6,0.188894,0.001633,0.123617,0.222357,0.28928,0.00996,0.163117,30
7,0.14786,0.002095,0.12986,0.24062,0.30666,0.01492,0.15656,35
8,0.210137,0.001401,0.11564,0.172801,0.288479,0.011405,0.199185,40
9,0.188937,0.000605,0.13158,0.1624,0.30705,0.019716,0.188749,45
