In [205]:
import numpy as np
from scipy.stats import chi2_contingency


import pandas as pd

In [206]:
csv_filename = 'JointDB_MetaEmotionConcised.csv'
data = pd.read_csv(csv_filename)
data = data.dropna()

In [207]:
# Discretize 'EstimatedEmotionConcised' and 'RaceConcised' into a new column
emotion_mapping = {'Positive': 0, 'Negative': 1, 'Surprise': 2}  # Example mapping
race_mapping = {'Asian': 0, 'Non-Asian': 1}  # Example mapping

data['Emotion_Discretized'] = data['EstimatedEmotionConcised'].map(emotion_mapping)
data['Race_Discretized'] = data['RaceConcised'].map(race_mapping)

# Label AU12 into a binary column
# Assuming 'AUs' is a column containing a list or string of AU codes
data['AU6_Label'] = data['Action Units'].apply(lambda x: 1 if '6' in str(x) else 0)

data['AU12_Label'] = data['Action Units'].apply(lambda x: 1 if '12' in str(x) else 0)


data['AU1_Label'] = data['Action Units'].apply(lambda x: 1 if '1' in str(x) else 0)
data['AU2_Label'] = data['Action Units'].apply(lambda x: 1 if '2' in str(x) else 0)
data['AU5_Label'] = data['Action Units'].apply(lambda x: 1 if '5' in str(x) else 0)
data['AU26_Label'] = data['Action Units'].apply(lambda x: 1 if '26' in str(x) else 0)


In [208]:
data

Unnamed: 0,Subject,Filename,Action Units,Estimated Emotion,RaceConcised,EstimatedEmotionConcised,Dataset,Emotion_Discretized,Race_Discretized,AU6_Label,AU12_Label,AU1_Label,AU2_Label,AU5_Label,AU26_Label
0,sub01,EP02_01f,12,happiness,Asian,Positive,casme2,0,0,0,1,1,1,0,0
1,sub01,EP19_05f,4+L10,disgust,Asian,Negative,casme2,1,0,0,0,1,0,0,0
2,sub01,EP19_06f,4+5+L10,disgust,Asian,Negative,casme2,1,0,0,0,1,0,1,0
3,sub02,EP01_11f,15,repression,Asian,Negative,casme2,1,0,0,0,1,0,1,0
4,sub02,EP02_04f,12+15,repression,Asian,Negative,casme2,1,0,0,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,035,035_6_3,R20B,fear,Asian,Negative,samm,1,0,0,0,0,1,0,0
286,035,035_7_1,A1B+A2C,surprise,Asian,Surprise,samm,2,0,0,0,1,1,0,0
287,035,035_7_2,R14A or 17A or 24A,contempt,Asian,Negative,samm,1,0,0,0,1,1,0,0
288,036,036_7_3,R10A+25+26,disgust,Asian,Negative,samm,1,0,1,0,1,1,1,1


In [209]:
data.loc[data['Emotion_Discretized'] == 0]['AU6_Label'].value_counts()


AU6_Label
0    46
1    12
Name: count, dtype: int64

In [210]:
data.loc[data['Emotion_Discretized'] == 0]['AU12_Label'].value_counts()


AU12_Label
1    53
0     5
Name: count, dtype: int64

In [211]:
#data.loc[data['Emotion_Discretized'] == 0]

In [212]:
# negative_active_df = data.loc[data['Emotion_Discretized'] == 0]

In [213]:
# au12_active_df = data.loc[data['AU12_Label'] == 1]
positive_active_df = data.loc[data['Emotion_Discretized'] == 0]
au12_active_df_non_asian = positive_active_df.loc[positive_active_df['Race_Discretized'] == 1]
au12_active_df_asian = positive_active_df.loc[positive_active_df['Race_Discretized'] == 0]

# Calculate AU12 and AU6 co-occurrence for Asian
asian_positive_au12_au6 = au12_active_df_asian[
    (au12_active_df_asian['AU12_Label'] == 1) & (au12_active_df_asian['AU6_Label'] == 1)
]
asian_positive_au12_au6_count = len(asian_positive_au12_au6)

# Calculate AU12 and AU6 co-occurrence for Non-Asian
non_asian_positive_au12_au6 = au12_active_df_non_asian[
    (au12_active_df_non_asian['AU12_Label'] == 1) & (au12_active_df_non_asian['AU6_Label'] == 1)
]
non_asian_positive_au12_au6_count = len(non_asian_positive_au12_au6)

co_occurrence_au6_au12 = pd.concat([asian_positive_au12_au6, non_asian_positive_au12_au6], axis=0)

In [None]:
au12_active_df_asian.shape

(33, 15)

In [None]:
# au12_active_df = data.loc[data['AU12_Label'] == 1]
surprise_active_df = data.loc[data['Emotion_Discretized'] == 2]
au26_active_df_non_asian = surprise_active_df.loc[surprise_active_df['Race_Discretized'] == 1]
au26_active_df_asian = surprise_active_df.loc[surprise_active_df['Race_Discretized'] == 0]

# # Calculate AU12 and AU6 co-occurrence for Asian
# asian_positive_au12_au6 = au12_active_df_asian[
#     (au12_active_df_asian['AU12_Label'] == 1) & (au12_active_df_asian['AU6_Label'] == 1)
# ]
# asian_positive_au12_au6_count = len(asian_positive_au12_au6)

# # Calculate AU12 and AU6 co-occurrence for Non-Asian
# non_asian_positive_au12_au6 = au12_active_df_non_asian[
#     (au12_active_df_non_asian['AU12_Label'] == 1) & (au12_active_df_non_asian['AU6_Label'] == 1)
# ]
# non_asian_positive_au12_au6_count = len(non_asian_positive_au12_au6)



In [None]:
print('Number of Co-occurring AU6 and AU12 for Asians: {}'.format(asian_positive_au12_au6_count))
print('Number of Co-occurring AU6 and AU12 for Non-asians: {}'.format(non_asian_positive_au12_au6_count))

Number of Co-occurring AU6 and AU12 for Asians: 8
Number of Co-occurring AU6 and AU12 for Non-asians: 4


In [None]:
# au26_df = data.loc[data['AU12_Label'] == 1]
surprise_active_df = data.loc[data['Emotion_Discretized'] == 2]
au26_active_df_non_asian = surprise_active_df.loc[surprise_active_df['Race_Discretized'] == 1]
au26_active_df_asian = surprise_active_df.loc[surprise_active_df['Race_Discretized'] == 0]

# Calculate AU12 and AU6 co-occurrence for Asian
asian_positive_au_active = au26_active_df_asian[
    ((au26_active_df_asian['AU1_Label'] == 1) | 
    (au26_active_df_asian['AU2_Label'] == 1) | 
    (au26_active_df_asian['AU5_Label'] == 1)) & 
    (au26_active_df_asian['AU26_Label'] == 1)
]
asian_positive_au26_count = len(asian_positive_au_active)

# Calculate AU12 and AU6 co-occurrence for Non-Asian
non_asian_positive_au_active = au26_active_df_non_asian[
    ((au26_active_df_non_asian['AU1_Label'] == 1) | 
    (au26_active_df_non_asian['AU2_Label'] == 1) | 
    (au26_active_df_non_asian['AU5_Label'] == 1)) & 
    (au26_active_df_non_asian['AU26_Label'] == 1)
]
non_asian_positive_au26_count = len(non_asian_positive_au_active)



In [None]:
# asian_positive_au_active

In [None]:
print('Number of Co-occurring AU1 2 5 and AU26 for Asians: {}'.format(asian_positive_au26_count))
print('Number of Co-occurring AU1 2 5 and AU26 for Non-asians: {}'.format(non_asian_positive_au26_count))

Number of Co-occurring AU1 2 5 and AU26 for Asians: 1
Number of Co-occurring AU1 2 5 and AU26 for Non-asians: 2


In [232]:
# #columns_to_chi_squared = ['Emotion_Discretized', 'AU12_Label']
# columns_to_chi_squared = ['Race_Discretized', 'AU6_Label']
# positive_au12_df = au12_active_df_asian[columns_to_chi_squared]
# positive_au12_non_asian_df = au12_active_df_non_asian[columns_to_chi_squared]
# positive_au12_df = pd.concat([positive_au12_df, positive_au12_non_asian_df], axis=0)

# # columns_to_chi_squared = ['Race_Discretized', 'AU26_Label']
# # surprise_au26_df = au26_active_df_asian[columns_to_chi_squared]
# # surprise_au26_non_asian_df = au26_active_df_non_asian[columns_to_chi_squared]
# # surprise_au26_df = pd.concat([surprise_au26_df, surprise_au26_non_asian_df], axis=0)

columns_to_chi_squared = ['AU6_Label', 'AU12_Label']
positive_cooccurrence = au12_active_df_non_asian[columns_to_chi_squared]

In [233]:
positive_cooccurrence.shape

(25, 2)

In [234]:
contingency_table = positive_cooccurrence.groupby('AU12_Label')['AU6_Label'].value_counts().unstack(fill_value=0)

# Ensure 'AU12_Label = 0' is included with all values set to 0
if 0 not in contingency_table.index:
    contingency_table.loc[0] = [0] * len(contingency_table.columns)

#contingency_table_col1 = pd.DataFrame([0, 1])
#contingency_table = pd.concat([contingency_table_col1, contingency_table], axis=1)

#contingency_table_nonasian = positive_au12_non_asian_df.groupby('Emotion_Discretized')['AU12_Label'].value_counts().unstack(fill_value=0)
# contingency_table_col1 = pd.DataFrame([0, 1, 2])
#contingency_table_nonasian = pd.concat([contingency_table_col1, contingency_table_nonasian], axis=1)


In [235]:
contingency_table

AU6_Label,0,1
AU12_Label,Unnamed: 1_level_1,Unnamed: 2_level_1
1,21,4
0,0,0


In [223]:
from scipy.stats import fisher_exact

In [231]:
#import numpy as np
#from scipy.stats import chi2_contingency

# Generate random data for a, b, c, and d
a, b, c, d = np.random.randint(10, 100, size=4)  # Random integers between 10 and 100

# Create the contingency table
data = np.array([[a, b],
                 [c, d]])
print(data.shape)

data = contingency_table + 1

# Perform the Chi-Square test
chi2, p, dof, ex = chi2_contingency(data)

# Print the results
#print("Contingency Table:\n", data)
print("Chi-Square Statistic:", chi2)
print("Degrees of Freedom:", dof)
print("p-value:", p)
print("Expected Frequencies:\n", ex)


# Run Fisher's Exact Test
oddsratio, p_value = fisher_exact(data)

print("Odds Ratio:", oddsratio)
print("p-value:", p_value)

(2, 2)
Chi-Square Statistic: 0.13720017636684298
Degrees of Freedom: 1
p-value: 0.7110804347751484
Expected Frequencies:
 [[ 5.10810811  1.89189189]
 [21.89189189  8.10810811]]
Odds Ratio: 2.5714285714285716
p-value: 0.6471409955755306


In [236]:
#import numpy as np
#from scipy.stats import chi2_contingency

# Generate random data for a, b, c, and d
a, b, c, d = np.random.randint(10, 100, size=4)  # Random integers between 10 and 100

# Create the contingency table
data = np.array([[a, b],
                 [c, d]])
print(data.shape)

data = contingency_table + 1

# Perform the Chi-Square test
chi2, p, dof, ex = chi2_contingency(data)

# Print the results
#print("Contingency Table:\n", data)
print("Chi-Square Statistic:", chi2)
print("Degrees of Freedom:", dof)
print("p-value:", p)
print("Expected Frequencies:\n", ex)

# Run Fisher's Exact Test
oddsratio, p_value = fisher_exact(data)

print("Odds Ratio:", oddsratio)
print("p-value:", p_value)

(2, 2)
Chi-Square Statistic: 0.02432232957595276
Degrees of Freedom: 1
p-value: 0.8760675630594825
Expected Frequencies:
 [[21.4137931  5.5862069]
 [ 1.5862069  0.4137931]]
Odds Ratio: 4.4
p-value: 0.37684729064039413
