In [None]:
import pandas as pd
from saac.statistics import ks2sample_test
import seaborn as sns; sns.set(style='darkgrid', palette ='colorblind', color_codes=True)

respath='../../data/evaluation/processed/'

## Occupation Results

In [None]:
occ_res_all = pd.read_csv(respath +'Occupation_Results.csv').sort_values('a_median')
print(f'Total rows: {len(occ_res_all)}')
print('Counts of sampled wage categories for median annual wage for all possible gender detected values ')

wage_order = ['very low', 'low', 'medium','high','very high'] # Presetting order of values for easier interpretation
gender_order = ['man', 'woman', 'unknown','no face']
pd.crosstab(occ_res_all['gender_detected_val'], occ_res_all['wage_val']).reindex(gender_order)[wage_order] 

In [None]:
#For the case of this evaluation we will not be including images where a face could not be detected 
#or where the gender could not be determined  

occ_res = occ_res_all[~occ_res_all['gender_detected_val'].isin(['unknown','no face'])]
print(f"Total rows after removing faceless and unknown gender detected results: {len(occ_res)}")

#### Two Sample Kolmogorov-Smirnov Test
<p>Using the default two-sided parameter for alternative, the null hypothesis is that the two distributions are identical and the alternative is that they are not identical.
<p>If the p-value is lower than our confidence level of 95%, we can reject the null hypothesis in favor of the alternative and conclude that the data were not drawn from the same distribution.

In [None]:
o = [x for x in ks2sample_test(occ_res, group_col='gender_detected_val', value_col='a_median')]
o

In [None]:
sns.displot(data=occ_res, x="a_median", hue="gender_detected_val", kind="kde")
sns.displot(data=occ_res, x='a_median', col='gender_detected_val',hue="gender_detected_val")

In [None]:
# #Creating a new dataframe from a groupby that profiles the filtered results and extracts counts of unique and valid
# #image files, image file quadrant and genders detected  for every base prompt.

# prompt_profile = occ_res.groupby('base_prompt', as_index=False)\
#                                .agg(ct_valid_image_files =('image_file', 'nunique'),\
#                                     ct_valid_quadrants =('quadrant', 'nunique'),\
#                                     ct_detected_genders =('gender_detected_val', 'nunique'))\
#                             .sort_values(['ct_valid_image_files', 'ct_valid_quadrants','ct_detected_genders'],ascending=False)

# #Extracting the prompts where all image files consistently contained quadrants with a detectable face of the same gender 
# const_prompts= prompt_profile.loc[(prompt_profile['ct_valid_image_files']== 6)\
#                                  & (prompt_profile['ct_valid_quadrants']== 4)\
#                                  & (prompt_profile['ct_detected_genders']== 1)]
# print(f'Total number of occupation prompts that consistently generated images of the same gender: {len(const_prompts)}')

# #Creating a list of the extracted consistent prompts and filtering the occ_results dataframe to 
# #gather the occupation titles,genders and a_median salaries for the associated consistent prompts
# const_prompt_list = const_prompts['base_prompt'].tolist()
# occ_const_prompt_res = occ_res[occ_res['base_prompt'].isin(const_prompt_list)]\
#                                                     .drop_duplicates(subset=['base_prompt'])

In [None]:
from scipy.stats import ranksums

mask_male = occ_res['gender_detected_cat'] == 4
mask_female = occ_res['gender_detected_cat'] == 3
male_salary = occ_res[mask_male]['a_median'].median()
female_salary = occ_res[mask_female]['a_median'].median()

print(f"Median salary for male faces: {male_salary:0.2f}")
print(f"Median salary for female faces: {female_salary:0.2f}")

wcox_results = ranksums(occ_res[mask_male]['a_median'], occ_res[mask_female]['a_median'])

In [None]:
print(wcox_results.statistic)
print(wcox_results.pvalue)

In [None]:
# g = sns.catplot(
#     data=occ_const_prompt_res,
#     y='a_median', 
#     x='norm_title', 
#     hue='gender_detected_val',
#     kind='point', orient='v',
#     sharex=False,sharey=False, margin_titles=True,
#     height=5, aspect=1.5,
# )
# g.set(xlabel='norm_title', ylabel='a_median')
# g.set_xticklabels(rotation= 65, ha='right',fontsize=10)
# g.tight_layout()

## Trait Descriptive Adjective (TDA) Results

In [None]:
tda_res_all = pd.read_csv(respath+'TDA_Results.csv' )
print(f'Total rows: {len(tda_res_all)}')

sentcheck = tda_res_all[tda_res_all['tda_compound']==tda_res_all['prompt_compound']]
print(f'Total rows where tda sentiment is equal to prompt sentiment : {len(sentcheck)}')

print('Counts of sampled sentiment categories for all possible gender detected values ')
sentiment_order = ['very negative', 'negative', 'neutral','positive','very positive'] 
gender_order = ['man', 'woman', 'unknown','no face']
pd.crosstab(tda_res_all['gender_detected_val'], tda_res_all['tda_sentiment_val']).reindex(gender_order)[sentiment_order] 

In [None]:
tda_res = tda_res_all[~tda_res_all['gender_detected_val'].isin(['unknown','no face'])]
print(f"Total rows after removing faceless and unknown gender detected results: {len(tda_res)}")

#### Two Sample Kolmogorov-Smirnov Test
<p>Using the default two-sided parameter for alternative, the null hypothesis is that the two distributions are identical and the alternative is that they are not identical.
<p>If the p-value is lower than our confidence level of 95%, we can reject the null hypothesis in favor of the alternative and conclude that the data were not drawn from the same distribution.

In [None]:
t = [x for x in ks2sample_test(tda_res, group_col='gender_detected_val', value_col='tda_compound')]
t

In [None]:
sns.displot(data=tda_res, x="tda_compound", hue="gender_detected_val", kind="kde")
sns.displot(data=tda_res, x='tda_compound', col='gender_detected_val',hue="gender_detected_val")