In [11]:
import pandas as pd
import numpy as np

## SAMM

In [12]:
path = '/home/hq/Documents/WorkingRepos/AU_Localization/CD6ME_Ethnic/SAMMMetaEmotionConcised.xlsx'
df = pd.read_excel(path)

In [13]:
# Create a crosstab to analyze the distribution between Estimated Emotion and Ethnic_Concised
df = df.loc[df['EstimatedEmotionConcised']!='Other']
emotion_ethnic_distribution = pd.crosstab(df['EstimatedEmotionConcised'], df['RaceConcised'])

# Display the crosstab
print(emotion_ethnic_distribution)

RaceConcised              Asian  Non-Asian
EstimatedEmotionConcised                  
Negative                     36         56
Positive                      1         25
Surprise                      5         10


## CASME 2

### Preprocessing

In [14]:
def categorize_emotion(emotion):
    if emotion == 'happiness':
        return 'Positive'
    elif emotion == 'surprise':
        return 'Surprise'
    elif emotion == 'others':
        return 'Other'
    else:
        return 'Negative'

In [15]:
casme2_annotation = '/home/hq/Documents/data/CASME2/CASME2-coding-updated.xlsx'
casme2_annotation_df = pd.read_excel(casme2_annotation)
path = '/home/hq/Documents/WorkingRepos/AU_Localization/CD6ME_Ethnic/results_casme2.csv'
ethnic_labelled_df = pd.read_csv(path)
ethnic_labelled_df['Subject'] = ethnic_labelled_df['Subject'].apply(lambda x: int(x[3:]))
ethnic_labelled_df['RaceConcised'] = ethnic_labelled_df['Race'].apply(lambda x: 'Asian' if x >= 2 else 'Non-Asian')
merged_df = pd.merge(casme2_annotation_df, ethnic_labelled_df[['Subject', 'Age', 'Gender', 'Race', 'RaceConcised']], on='Subject', how='left')
merged_df['EstimatedEmotionConcised'] = merged_df['Estimated Emotion'].apply(categorize_emotion)

# Remove columns with "Unnamed" in their names
merged_df = merged_df.loc[:, ~merged_df.columns.str.contains('^Unnamed')]




In [None]:
path = 'CASME2MetaEmotionConcised.xlsx'
merged_df.to_excel(path, index=False)

In [16]:
merged_df

Unnamed: 0,Subject,Filename,OnsetFrame,ApexFrame,OffsetFrame,Action Units,Estimated Emotion,Age,Gender,Race,RaceConcised,EstimatedEmotionConcised
0,1,EP02_01f,46,59,86,12,happiness,2,0,2,Asian,Positive
1,1,EP03_02,131,139,161,18,others,2,0,2,Asian,Other
2,1,EP04_02,21,54,76,4,others,2,0,2,Asian,Other
3,1,EP04_03,31,41,56,4,others,2,0,2,Asian,Other
4,1,EP04_04,23,49,66,4,others,2,0,2,Asian,Other
...,...,...,...,...,...,...,...,...,...,...,...,...
251,26,EP18_46,31,46,101,17,others,1,1,2,Asian,Other
252,26,EP18_47,6,49,86,4,disgust,1,1,2,Asian,Negative
253,26,EP18_49,16,54,80,4,disgust,1,1,2,Asian,Negative
254,26,EP18_50,78,99,161,4,disgust,1,1,2,Asian,Negative


In [42]:
merged_df.columns

Index(['Subject', 'Filename', 'OnsetFrame', 'ApexFrame', 'OffsetFrame',
       'Action Units', 'Estimated Emotion', 'Age', 'Gender', 'Race',
       'RaceConcised', 'EstimatedEmotionConcised'],
      dtype='object')

### Getting insights

In [17]:
casme2_concised_df = 'CASME2MetaEmotionConcised.xlsx'
samm_concised_df = 'SAMMMetaEmotionConcised.xlsx'
casme2_concised_df = pd.read_excel(casme2_concised_df)
samm_concised_df = pd.read_excel(samm_concised_df)

columns = ['Subject', 'Filename', 'Action Units', 'Estimated Emotion', 'RaceConcised', 'EstimatedEmotionConcised', 'Race']
casme2_concised_df = casme2_concised_df[columns]
samm_concised_df = samm_concised_df[columns]

# try to match subject substring in metadata_csv for aus
casme2_concised_df['Subject'] = casme2_concised_df['Subject'].astype(str).str.zfill(2)
casme2_concised_df['Dataset'] = 'casme2'
samm_concised_df['Subject'] = samm_concised_df['Subject'].astype(str).str.zfill(3)
samm_concised_df['Dataset'] = 'samm'


concat_df = pd.concat([casme2_concised_df, samm_concised_df], ignore_index=True)
concat_df['Estimated Emotion'] = concat_df['Estimated Emotion'].str.lower()
# concat_df['Estimated Emotion'] = concat_df['Estimated Emotion'].map({'others':'other'})
concat_df = concat_df.loc[~concat_df['Estimated Emotion'].isin(['other', 'others'])]

### Export to xlsx/csv

In [18]:
samm_annotations = '/home/hq/Documents/data/SAMM/SAMM_annotation.xlsx'
samm_annotations = pd.read_excel(samm_annotations)
casme2_annotation = '/home/hq/Documents/data/CASME2/CASME2-coding-updated.xlsx'
casme2_annotation = pd.read_excel(casme2_annotation)

samm_annotations['Subject'] = samm_annotations['Subject'].astype(str).str.zfill(3)
casme2_annotation['Subject'] = casme2_annotation['Subject'].astype(str).str.zfill(2)
casme2_annotation['Subject'] = casme2_annotation['Subject'].apply(lambda x: 'sub' + x if len(x) == 2 else x)

concat_df['Subject'] = concat_df['Subject'].apply(lambda x: 'sub' + x if len(x) == 2 else x)
concat_df_only_ethnic = concat_df.drop_duplicates(subset='Subject')
joint_annotation = pd.concat([casme2_annotation, samm_annotations])

merged_df = pd.merge(joint_annotation, concat_df_only_ethnic[['Subject', 'RaceConcised', 'Dataset']], on='Subject', how='left')
merged_df = merged_df.dropna(subset=['RaceConcised'])


In [21]:
concat_df['RaceConcised'].unique()

array(['Asian', 'Non-Asian'], dtype=object)

In [24]:
# Create a crosstab to analyze the distribution between Estimated Emotion and Ethnic_Concised
concat_df_distribution = pd.crosstab(concat_df['EstimatedEmotionConcised'], concat_df['Race'])

# Display the crosstab
print(concat_df_distribution)

Race                       0  1    2  3  4
EstimatedEmotionConcised                  
Negative                  52  4  122  5  9
Positive                  25  0   33  0  0
Surprise                  10  0   28  0  2


In [213]:
concat_df['Subject'] = concat_df['Subject'].apply(lambda x: 'sub' + x if len(x) == 2 else x)
concat_df.to_csv('JointDB_MetaEmotionConcised.csv', index=False)
concat_df.to_excel('JointDB_MetaEmotionConcised.xlsx', index=False)

In [214]:
concat_df

Unnamed: 0,Subject,Filename,Action Units,Estimated Emotion,RaceConcised,EstimatedEmotionConcised,Dataset
0,sub01,EP02_01f,12,happiness,Asian,Positive,casme2
7,sub01,EP19_05f,4+L10,disgust,Asian,Negative,casme2
8,sub01,EP19_06f,4+5+L10,disgust,Asian,Negative,casme2
9,sub02,EP01_11f,15,repression,Asian,Negative,casme2
10,sub02,EP02_04f,12+15,repression,Asian,Negative,casme2
...,...,...,...,...,...,...,...
408,035,035_6_3,R20B,fear,Asian,Negative,samm
409,035,035_7_1,A1B+A2C,surprise,Asian,Surprise,samm
410,035,035_7_2,R14A or 17A or 24A,contempt,Asian,Negative,samm
411,036,036_7_3,R10A+25+26,disgust,Asian,Negative,samm


In [91]:
casme2_concised_df

Unnamed: 0,Subject,Filename,Action Units,Estimated Emotion,RaceConcised,EstimatedEmotionConcised
0,01,EP02_01f,12,happiness,Asian,Positive
1,01,EP03_02,18,others,Asian,Other
2,01,EP04_02,4,others,Asian,Other
3,01,EP04_03,4,others,Asian,Other
4,01,EP04_04,4,others,Asian,Other
...,...,...,...,...,...,...
251,26,EP18_46,17,others,Asian,Other
252,26,EP18_47,4,disgust,Asian,Negative
253,26,EP18_49,4,disgust,Asian,Negative
254,26,EP18_50,4,disgust,Asian,Negative


In [79]:
# Create a crosstab to analyze the distribution between Estimated Emotion and Ethnic_Concised
concat_df_distribution = pd.crosstab(concat_df['Estimated Emotion'], concat_df['RaceConcised'])

# Display the crosstab
print(concat_df_distribution)

RaceConcised       Asian  Non-Asian
Estimated Emotion                  
anger                 20         37
contempt               6          6
disgust               67          6
fear                   6          4
happiness             33         25
repression            27          0
sadness               10          3
surprise              30         10


In [26]:
# Create a crosstab to analyze the distribution between Estimated Emotion and Ethnic_Concised
concat_df_distribution = pd.crosstab(concat_df['EstimatedEmotionConcised'], concat_df['RaceConcised'])

# Display the crosstab
print(concat_df_distribution)

RaceConcised              Asian  Non-Asian
EstimatedEmotionConcised                  
Negative                    136         56
Positive                     33         25
Surprise                     30         10


### Happiness AU Data Distribution

In [95]:
au_csv = 'cross_dataset_seq.csv'
au_csv = pd.read_csv(au_csv)
au_csv = au_csv.loc[au_csv['dataset'].isin(['casme2', 'samm'])]
aus = ['Subject', 'Filename', "AU1", "AU2", "AU4", "AU5", "AU6", "AU7", "AU9", "AU10", "AU12", "AU14", "AU15", "AU17"]
au_csv['Subject'] = au_csv['subject']
au_csv['Filename'] = au_csv['material']
au_csv = au_csv[aus]


In [115]:
merged_df = pd.merge(concat_df, au_csv, on=['Subject', 'Filename'], how='left')
happiness_df = merged_df.loc[merged_df['Estimated Emotion'] == 'happiness']

In [116]:
merged_df

Unnamed: 0,Subject,Filename,Action Units,Estimated Emotion,RaceConcised,EstimatedEmotionConcised,AU1,AU2,AU4,AU5,AU6,AU7,AU9,AU10,AU12,AU14,AU15,AU17
0,01,EP02_01f,12,happiness,Asian,Positive,0,0,0,0.0,0,0,0,0,1,0,0,0
1,01,EP19_05f,4+L10,disgust,Asian,Negative,0,0,1,0.0,0,0,0,1,0,0,0,0
2,01,EP19_06f,4+5+L10,disgust,Asian,Negative,0,0,1,1.0,0,0,0,1,0,0,0,0
3,02,EP01_11f,15,repression,Asian,Negative,0,0,0,0.0,0,0,0,0,0,0,1,0
4,02,EP02_04f,12+15,repression,Asian,Negative,0,0,0,0.0,0,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,035,035_6_3,R20B,fear,Asian,Negative,0,0,0,0.0,0,0,0,0,0,0,0,0
286,035,035_7_1,A1B+A2C,surprise,Asian,Surprise,1,1,0,0.0,0,0,0,0,0,0,0,0
287,035,035_7_2,R14A or 17A or 24A,contempt,Asian,Negative,0,0,0,0.0,0,0,0,0,0,1,0,0
288,036,036_7_3,R10A+25+26,disgust,Asian,Negative,0,0,0,0.0,0,0,0,1,0,0,0,0


In [None]:
# Create a crosstab to analyze the distribution between Estimated Emotion and Ethnic_Concised
au_columns = happiness_df.filter(like='AU').columns.tolist()
# Create a crosstab to analyze the distribution of AUs between different races
concat_df_distribution = pd.crosstab(index=happiness_df['RaceConcised'], columns=[happiness_df[au] for au in au_columns])

# Display the crosstab
print(concat_df_distribution)

AU1            0                                          1
AU2            0                                      1   0
AU4            0                                  1   0   0
AU5          0.0                                0.0 0.0 0.0
AU6            0                        1         0   0   0
AU7            0                     1  0     1   0   0   0
AU9            0                     0  0     0   0   0   0
AU10           0                  1  0  0     0   0   0   0
AU12           0         1        1  1  1     1   1   1   1
AU14           0  1      0     1  0  0  0     0   0   0   0
AU15           1  0  1   0  1  0  0  0  0     0   0   0   0
AU17           0  0  0   0  0  0  0  0  0  1  0   0   0   0
RaceConcised                                               
Asian          1  3  1  16  1  1  0  0  6  1  1   1   0   1
Non-Asian      0  0  0  17  2  0  1  3  1  0  0   0   1   0


In [127]:
# Group by 'RaceConcised' and calculate the average score of each AU
average_au_scores = happiness_df.groupby('RaceConcised')[au_columns].mean()

# Display the average AU scores
# print(average_au_scores)

# Compare the averages between Asian and Non-Asian
asian_au_scores = average_au_scores.loc['Asian']
non_asian_au_scores = average_au_scores.loc['Non-Asian']

# Display the comparison
comparison_df = pd.DataFrame({'Asian': asian_au_scores, 'Non-Asian': non_asian_au_scores})
print(comparison_df)

         Asian  Non-Asian
AU1   0.030303       0.00
AU2   0.000000       0.04
AU4   0.030303       0.00
AU5   0.000000       0.00
AU6   0.242424       0.04
AU7   0.030303       0.12
AU9   0.000000       0.00
AU10  0.000000       0.04
AU12  0.848485       1.00
AU14  0.151515       0.00
AU15  0.090909       0.08
AU17  0.030303       0.00


In [128]:
happiness_df.shape

(58, 18)