In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from statistics import mean

In [14]:
number_of_runs=5
significance_level=1.96 # for 95% ci
height = 6
font_size=14
rotation_degree =15

In [15]:
Subgroup_FPR="./FPR/SubGroup_FPR"

## Sex

In [16]:
seed_19_sex = pd.read_csv(Subgroup_FPR+"/run_19FPR_FNR_NF_sex.csv",index_col=0)
seed_31_sex = pd.read_csv(Subgroup_FPR+"/run_31FPR_FNR_NF_sex.csv",index_col=0)
seed_38_sex = pd.read_csv(Subgroup_FPR+"/run_38FPR_FNR_NF_sex.csv",index_col=0)
seed_47_sex = pd.read_csv(Subgroup_FPR+"/run_47FPR_FNR_NF_sex.csv",index_col=0)
seed_77_sex = pd.read_csv(Subgroup_FPR+"/run_77FPR_FNR_NF_sex.csv",index_col=0)
seed_77_sex.head(5)

Unnamed: 0,diseases,#M,FPR_M,FNR_M,#F,FPR_F,FNR_F
0,No Finding,9579,0.063,0.184,6672,0.08,0.189


In [17]:
sex_dataframes = [seed_19_sex, seed_31_sex, seed_38_sex, seed_47_sex, seed_77_sex]
result_sex = pd.concat(sex_dataframes)

result_sex_df = result_sex.describe()

In [18]:
print("FPR and FNR distribiution in sexes")
print(round(result_sex_df.loc['mean'],3))

FPR and FNR distribiution in sexes
#M       9579.000
FPR_M       0.059
FNR_M       0.193
#F       6672.000
FPR_F       0.073
FNR_F       0.202
Name: mean, dtype: float64


In [19]:
print("FPR and FNR distribiution in sex Confidence interval")
round(significance_level * result_sex_df.loc['std'] / np.sqrt(5),3)

FPR and FNR distribiution in sex Confidence interval


#M       0.000
FPR_M    0.004
FNR_M    0.013
#F       0.000
FPR_F    0.006
FNR_F    0.011
Name: std, dtype: float64

In [20]:
print("CheXpert: Mean FPR distribiution over sexes")
mean([result_sex_df.loc['mean']["FPR_F"], result_sex_df.loc['mean']["FPR_M"]])

CheXpert: Mean FPR distribiution over sexes


0.0658

In [21]:
sex = ['M','F' ]
fpr_fnr_sex_df = pd.DataFrame(sex, columns=["sex"])

In [22]:

def FiveRunSubgroup(factors, df_in, df_out):
    fpr = []
    fnr = []
    percent = []
    ci_fpr =[]
    ci_fnr =[]
    confI = significance_level * df_in.loc['std'] / np.sqrt(5)

    for fact in factors:
        percent.append(round(df_in.loc['mean']['#'+fact],3))
        fpr.append(round(df_in.loc['mean']['FPR_'+fact],3))
        fnr.append(round(df_in.loc['mean']['FNR_'+fact],3))
        ci_fpr.append(round(confI.loc['FPR_'+fact],3))
        ci_fnr.append(round(confI.loc['FNR_'+fact],3))

    df_out['#'] = percent
    df_out['FPR']=fpr
    df_out['CI_FPR']=ci_fpr

    df_out['FNR']=fnr
    df_out['CI_FNR']=ci_fnr

    return df_out


In [23]:
fpr_fnr_sex_df=FiveRunSubgroup(sex, result_sex_df, fpr_fnr_sex_df)
fpr_fnr_sex_df.to_csv(Subgroup_FPR+'/Subgroup_FNR_FPR_Sex.csv')
fpr_fnr_sex_df

Unnamed: 0,sex,#,FPR,CI_FPR,FNR,CI_FNR
0,M,9579.0,0.059,0.004,0.193,0.013
1,F,6672.0,0.073,0.006,0.202,0.011


## Age

In [24]:
seed_19_age = pd.read_csv(Subgroup_FPR+"/run_19FPR_FNR_NF_age.csv",index_col=0)
seed_31_age = pd.read_csv(Subgroup_FPR+"/run_31FPR_FNR_NF_age.csv",index_col=0)
seed_38_age = pd.read_csv(Subgroup_FPR+"/run_38FPR_FNR_NF_age.csv",index_col=0)
seed_47_age = pd.read_csv(Subgroup_FPR+"/run_47FPR_FNR_NF_age.csv",index_col=0)
seed_77_age = pd.read_csv(Subgroup_FPR+"/run_77FPR_FNR_NF_age.csv",index_col=0)
seed_77_age.head(5)

Unnamed: 0,diseases,#60-80,FPR_60-80,FNR_60-80,#40-60,FPR_40-60,FNR_40-60,#20-40,FPR_20-40,FNR_20-40,#80+,FPR_80+,FNR_80+,#0-20,FPR_0-20,FNR_0-20
0,No Finding,6526,0.062,0.205,4770,0.072,0.162,1797,0.086,0.113,3033,0.074,0.349,125,0.064,0.146


In [25]:
age_dataframes = [seed_19_age, seed_31_age, seed_38_age, seed_47_age, seed_77_age]
result_age_df= pd.concat(age_dataframes)

result_age_df = result_age_df.describe()

In [26]:
print("FPR distribiution in ages")
print(round(result_age_df.loc['mean'],3))

FPR distribiution in ages
#60-80       6526.000
FPR_60-80       0.058
FNR_60-80       0.221
#40-60       4770.000
FPR_40-60       0.068
FNR_40-60       0.167
#20-40       1797.000
FPR_20-40       0.081
FNR_20-40       0.121
#80+         3033.000
FPR_80+         0.063
FNR_80+         0.370
#0-20         125.000
FPR_0-20        0.064
FNR_0-20        0.161
Name: mean, dtype: float64


In [27]:
print("MIMIC: Mean FPR distribiution over ages")
mean([result_age_df.loc['mean']["FPR_60-80"], result_age_df.loc['mean']["FPR_40-60"],
      result_age_df.loc['mean']["FPR_20-40"], result_age_df.loc['mean']["FPR_0-20"],
      result_age_df.loc['mean']["FPR_80+"]])

MIMIC: Mean FPR distribiution over ages


0.06676

In [28]:
round(significance_level* result_age_df.loc['std'] / np.sqrt(5),3)

#60-80       0.000
FPR_60-80    0.004
FNR_60-80    0.015
#40-60       0.000
FPR_40-60    0.004
FNR_40-60    0.007
#20-40       0.000
FPR_20-40    0.006
FNR_20-40    0.009
#80+         0.000
FPR_80+      0.009
FNR_80+      0.022
#0-20        0.000
FPR_0-20     0.005
FNR_0-20     0.019
Name: std, dtype: float64

In [29]:
age = ['0-20','20-40','40-60','60-80','80+' ]
fpr_fnr_age_df = pd.DataFrame(age, columns=["Age"])

fpr_fnr_age_df=FiveRunSubgroup(age, result_age_df, fpr_fnr_age_df)
fpr_fnr_age_df.to_csv(Subgroup_FPR+'/Subgrounp_FNR_FPR_Age.csv')
fpr_fnr_age_df

Unnamed: 0,Age,#,FPR,CI_FPR,FNR,CI_FNR
0,0-20,125.0,0.064,0.005,0.161,0.019
1,20-40,1797.0,0.081,0.006,0.121,0.009
2,40-60,4770.0,0.068,0.004,0.167,0.007
3,60-80,6526.0,0.058,0.004,0.221,0.015
4,80+,3033.0,0.063,0.009,0.37,0.022


## Race

In [30]:
seed_19_race = pd.read_csv(Subgroup_FPR+"/run_19FPR_FNR_NF_race.csv",index_col=0)
seed_31_race = pd.read_csv(Subgroup_FPR+"/run_31FPR_FNR_NF_race.csv",index_col=0)
seed_38_race = pd.read_csv(Subgroup_FPR+"/run_38FPR_FNR_NF_race.csv",index_col=0)
seed_47_race = pd.read_csv(Subgroup_FPR+"/run_47FPR_FNR_NF_race.csv",index_col=0)
seed_77_race = pd.read_csv(Subgroup_FPR+"/run_77FPR_FNR_NF_race.csv",index_col=0)
seed_77_race.head(3)

Unnamed: 0,diseases,#White,FPR_White,FNR_White,#Black,FPR_Black,FNR_Black,#Hisp,FPR_Hisp,FNR_Hisp,#Other,FPR_Other,FNR_Other,#Asian,FPR_Asian,FNR_Asian,#American,FPR_American,FNR_American
0,No Finding,10621,0.066,0.209,938,0.091,0.15,406,0.042,0.161,2209,0.08,0.139,1863,0.07,0.177,214,0.131,0.145


In [31]:
race_dataframes = [seed_19_race, seed_31_race, seed_38_race, seed_47_race, seed_77_race]
result_race= pd.concat(race_dataframes)

result_race_df =result_race.describe()

In [32]:
print("FPR distribiution in races")
print(round(result_race_df.loc['mean'],3))

FPR distribiution in races
#White          10621.000
FPR_White           0.061
FNR_White           0.222
#Black            938.000
FPR_Black           0.084
FNR_Black           0.159
#Hisp             406.000
FPR_Hisp            0.036
FNR_Hisp            0.157
#Other           2209.000
FPR_Other           0.075
FNR_Other           0.146
#Asian           1863.000
FPR_Asian           0.065
FNR_Asian           0.190
#American         214.000
FPR_American        0.113
FNR_American        0.148
Name: mean, dtype: float64


In [33]:
round(significance_level * result_race_df.loc['std'] / np.sqrt(5),3)

#White          0.000
FPR_White       0.005
FNR_White       0.013
#Black          0.000
FPR_Black       0.006
FNR_Black       0.013
#Hisp           0.000
FPR_Hisp        0.005
FNR_Hisp        0.017
#Other          0.000
FPR_Other       0.005
FNR_Other       0.007
#Asian          0.000
FPR_Asian       0.004
FNR_Asian       0.012
#American       0.000
FPR_American    0.015
FNR_American    0.005
Name: std, dtype: float64

In [34]:
race = ['White','Black','Hisp','Other','Asian','American' ]
fpr_fpr_race_df = pd.DataFrame(race, columns=["Race"])

fpr_fpr_race_df=FiveRunSubgroup(race, result_race_df, fpr_fpr_race_df)
fpr_fpr_race_df.to_csv(Subgroup_FPR+'/Subgroup_FNR_FPR_Race.csv')
fpr_fpr_race_df

Unnamed: 0,Race,#,FPR,CI_FPR,FNR,CI_FNR
0,White,10621.0,0.061,0.005,0.222,0.013
1,Black,938.0,0.084,0.006,0.159,0.013
2,Hisp,406.0,0.036,0.005,0.157,0.017
3,Other,2209.0,0.075,0.005,0.146,0.007
4,Asian,1863.0,0.065,0.004,0.19,0.012
5,American,214.0,0.113,0.015,0.148,0.005


# Two Group Intersectional identity

## Age-Sex

In [35]:
TwoGroup_FPR="./FPR/Two_Group_Intersection_FPR"

In [37]:
seed_19_agesex = pd.read_csv(TwoGroup_FPR+"/run_19FP_AgeSex.csv")
seed_31_agesex= pd.read_csv(TwoGroup_FPR+"/run_31FP_AgeSex.csv")
seed_38_agesex = pd.read_csv(TwoGroup_FPR+"/run_38FP_AgeSex.csv")
seed_47_agesex = pd.read_csv(TwoGroup_FPR+"/run_47FP_AgeSex.csv")
seed_77_agesex = pd.read_csv(TwoGroup_FPR+"/run_77FP_AgeSex.csv")

fp_agesex =pd.concat([seed_19_agesex, seed_31_agesex,seed_38_agesex, seed_47_agesex,seed_77_agesex])
fp_agesex =fp_agesex.groupby("Age")
fp_agesex = fp_agesex.describe()

fp_agesex

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_M,FPR_M,FPR_M,FPR_M,FPR_M,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0-20,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0608,...,0.066,0.079,5.0,0.0694,0.011502,0.061,0.061,0.061,0.082,0.082
20-40,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.09,...,0.098,0.098,5.0,0.0666,0.003435,0.061,0.066,0.068,0.068,0.07
40-60,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.066,...,0.07,0.07,5.0,0.0714,0.004615,0.065,0.068,0.074,0.075,0.075
60-80,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0496,...,0.052,0.052,5.0,0.0704,0.006025,0.062,0.067,0.071,0.076,0.076
80+,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0472,...,0.053,0.053,5.0,0.0858,0.01781,0.067,0.07,0.084,0.104,0.104


In [38]:
print("Age-Sex over male")
round(fp_agesex['FPR_M']['mean'],3)

Age-Sex over male


Age
0-20     0.061
20-40    0.090
40-60    0.066
60-80    0.050
80+      0.047
Name: mean, dtype: float64

In [39]:
round(significance_level * fp_agesex['FPR_M']["std"] / np.sqrt(5),3)

Age
0-20     0.010
20-40    0.008
40-60    0.004
60-80    0.003
80+      0.005
Name: std, dtype: float64

In [40]:
print("mean: ", round(fp_agesex['FPR_M']['mean'],3).mean() )

mean:  0.0628


In [41]:
round(fp_agesex['FPR_F']['mean'],3)

Age
0-20     0.069
20-40    0.067
40-60    0.071
60-80    0.070
80+      0.086
Name: mean, dtype: float64

In [42]:
round(significance_level * fp_agesex['FPR_F']["std"] / np.sqrt(5),3)

Age
0-20     0.010
20-40    0.003
40-60    0.004
60-80    0.005
80+      0.016
Name: std, dtype: float64

In [43]:
CI = round(significance_level * fp_agesex['FPR_F']["std"] / np.sqrt(5),3)

In [44]:
def FiveRun(factors,output_df,df):
  for factor in factors:
    fpr = round(df[factor]['mean'],3)
    confI = round(significance_level * df[factor]["std"] / np.sqrt(5),3)
    output_df[factor] = pd.DataFrame(fpr.values.tolist(),columns =[factor])
    output_df['CI_'+factor] = pd.DataFrame(confI.values.tolist(),columns =['CI_'+factor])

  return output_df

In [45]:
factors = ['FPR_F', 'FPR_M']
age =['0-20', '20-40', '40-60', '60-80','80+']
agesex_df = pd.DataFrame(age, columns=["Age"])

agesex_df = FiveRun(factors,agesex_df,fp_agesex)
agesex_df.to_csv(TwoGroup_FPR+'/Inter_AgeSex.csv')
agesex_df

Unnamed: 0,Age,FPR_F,CI_FPR_F,FPR_M,CI_FPR_M
0,0-20,0.069,0.01,0.061,0.01
1,20-40,0.067,0.003,0.09,0.008
2,40-60,0.071,0.004,0.066,0.004
3,60-80,0.07,0.005,0.05,0.003
4,80+,0.086,0.016,0.047,0.005


## Race- Sex

In [46]:
seed_19_race_sex = pd.read_csv(TwoGroup_FPR+"/run_19FP_RaceSex.csv")
seed_31_race_sex= pd.read_csv(TwoGroup_FPR+"/run_31FP_RaceSex.csv")
seed_38_race_sex = pd.read_csv(TwoGroup_FPR+"/run_38FP_RaceSex.csv")
seed_47_race_sex = pd.read_csv(TwoGroup_FPR+"/run_47FP_RaceSex.csv")
seed_77_race_sex = pd.read_csv(TwoGroup_FPR+"/run_77FP_RaceSex.csv")

fp_race_sex =pd.concat([seed_19_race_sex, seed_31_race_sex,seed_38_race_sex,
                       seed_47_race_sex,seed_77_race_sex])

fp_race_sex =fp_race_sex.groupby("race")
fp_race_sex = fp_race_sex.describe()

fp_race_sex

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_M,FPR_M,FPR_M,FPR_M,FPR_M,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AMERICAN INDIAN/ALASKA NATIVE,5.0,5.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,0.099,...,0.103,0.103,5.0,0.1248,0.027545,0.094,0.111,0.111,0.154,0.154
ASIAN,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0608,...,0.067,0.067,5.0,0.0726,0.00305,0.068,0.071,0.074,0.075,0.075
BLACK/AFRICAN AMERICAN,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.0848,...,0.09,0.09,5.0,0.0816,0.008735,0.074,0.074,0.078,0.091,0.091
HISPANIC/LATINO,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.0342,...,0.039,0.039,5.0,0.0396,0.009127,0.027,0.034,0.041,0.048,0.048
OTHER,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0658,...,0.071,0.071,5.0,0.0858,0.005586,0.079,0.081,0.087,0.091,0.091
WHITE,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0548,...,0.058,0.058,5.0,0.0692,0.007396,0.06,0.063,0.071,0.076,0.076


In [47]:
factors = ['FPR_F', 'FPR_M']
race =['AMERICAN INDIAN/ALASKA NATIVE', 'ASIAN', 'BLACK/AFRICAN AMERICAN',
       'HISPANIC/LATINO','OTHER','WHITE']
RaceSex_df = pd.DataFrame(race, columns=["race"])

RaceSex_df = FiveRun(factors,RaceSex_df,fp_race_sex)
RaceSex_df.to_csv(TwoGroup_FPR+'/Inter_RaceSex.csv')
RaceSex_df

Unnamed: 0,race,FPR_F,CI_FPR_F,FPR_M,CI_FPR_M
0,AMERICAN INDIAN/ALASKA NATIVE,0.125,0.024,0.099,0.005
1,ASIAN,0.073,0.003,0.061,0.005
2,BLACK/AFRICAN AMERICAN,0.082,0.008,0.085,0.006
3,HISPANIC/LATINO,0.04,0.008,0.034,0.004
4,OTHER,0.086,0.005,0.066,0.006
5,WHITE,0.069,0.006,0.055,0.004


## Race-Age

In [48]:
seed_19_race_age = pd.read_csv(TwoGroup_FPR+"/run_19FP_RaceAge.csv")
seed_31_race_age= pd.read_csv(TwoGroup_FPR+"/run_31FP_RaceAge.csv")
seed_38_race_age = pd.read_csv(TwoGroup_FPR+"/run_38FP_RaceAge.csv")
seed_47_race_age = pd.read_csv(TwoGroup_FPR+"/run_47FP_RaceAge.csv")
seed_77_race_age = pd.read_csv(TwoGroup_FPR+"/run_77FP_RaceAge.csv")

fp_race_age =pd.concat([seed_19_race_age, seed_31_race_age,seed_38_race_age,
                       seed_47_race_age,seed_77_race_age])

fp_race_age =fp_race_age.groupby("age")
fp_race_age = fp_race_age.describe()
fp_race_age

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_White,FPR_White,...,FPR_Asian,FPR_Asian,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0-20,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0236,...,0.0,0.0,5.0,0.4666,0.182939,0.333,0.333,0.333,0.667,0.667
20-40,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.0854,...,0.064,0.064,5.0,0.1226,0.008764,0.113,0.113,0.129,0.129,0.129
40-60,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.068,...,0.074,0.076,5.0,0.1074,0.010407,0.096,0.096,0.115,0.115,0.115
60-80,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0508,...,0.078,0.078,5.0,0.107,0.020964,0.081,0.093,0.105,0.128,0.128
80+,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0616,...,0.052,0.052,5.0,0.0364,0.049843,0.0,0.0,0.0,0.091,0.091


In [49]:
round(significance_level * fp_race_age['FPR_White']["std"] / np.sqrt(5),3)

age
0-20     0.011
20-40    0.007
40-60    0.003
60-80    0.003
80+      0.009
Name: std, dtype: float64

In [50]:
factors = ['FPR_White', 'FPR_Black','FPR_Hisp','FPR_Other','FPR_Asian','FPR_American']
age =['0-20', '20-40', '40-60', '60-80','80+']
RaceAge_df = pd.DataFrame(age, columns=["age"])


RaceAge_df = FiveRun(factors,RaceAge_df,fp_race_age)
RaceAge_df.to_csv(TwoGroup_FPR+'/Inter_RaceAge.csv')
RaceAge_df

Unnamed: 0,age,FPR_White,CI_FPR_White,FPR_Black,CI_FPR_Black,FPR_Hisp,CI_FPR_Hisp,FPR_Other,CI_FPR_Other,FPR_Asian,CI_FPR_Asian,FPR_American,CI_FPR_American
0,0-20,0.024,0.011,0.0,0.0,0.333,0.0,0.333,0.0,0.0,0.0,0.467,0.16
1,20-40,0.085,0.007,0.081,0.006,0.019,0.0,0.09,0.005,0.061,0.004,0.123,0.008
2,40-60,0.068,0.003,0.103,0.01,0.045,0.012,0.055,0.005,0.073,0.002,0.107,0.009
3,60-80,0.051,0.003,0.068,0.004,0.024,0.003,0.079,0.005,0.074,0.004,0.107,0.018
4,80+,0.062,0.009,0.087,0.008,0.056,0.011,0.109,0.009,0.038,0.011,0.036,0.044


# Three Group Intersectional identity

## Race , age and sex

In [51]:
ThreeGroup_FPR ='./FPR/Three_Group_Intersection_FPR/'

In [52]:
RaceAgeSex_19 = pd.read_csv(ThreeGroup_FPR+"/run_19FP_RaceAgeSex.csv")
RaceAgeSex_31 = pd.read_csv(ThreeGroup_FPR+"/run_31FP_RaceAgeSex.csv")
RaceAgeSex_38 = pd.read_csv(ThreeGroup_FPR+"/run_38FP_RaceAgeSex.csv")
RaceAgeSex_47 = pd.read_csv(ThreeGroup_FPR+"/run_47FP_RaceAgeSex.csv")
RaceAgeSex_77 = pd.read_csv(ThreeGroup_FPR+"/run_77FP_RaceAgeSex.csv")

In [53]:

fp_race_age_sex_df =pd.concat([RaceAgeSex_19, RaceAgeSex_31,RaceAgeSex_38,
                       RaceAgeSex_47,RaceAgeSex_77])

fp_race_age_sex_df =fp_race_age_sex_df.groupby("race")
fp_race_age_sex_df = fp_race_age_sex_df.describe()
fp_race_age_sex_df

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_60_80_M,FPR_60_80_M,...,FPR_80+_F,FPR_80+_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AMERICAN INDIAN/ALASKA NATIVE,5.0,5.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,0.0818,...,0.167,0.167,5.0,0.2,0.273861,0.0,0.0,0.0,0.5,0.5
ASIAN,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0672,...,0.046,0.046,0.0,,,,,,,
BLACK/AFRICAN AMERICAN,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.0588,...,0.079,0.079,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HISPANIC/LATINO,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.0136,...,0.125,0.125,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OTHER,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0518,...,0.159,0.159,5.0,0.75,0.0,0.75,0.75,0.75,0.75,0.75
WHITE,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0458,...,0.111,0.111,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
factors = ['FPR_60_80_M', 'FPR_40_60_M','FPR_20_40_M','FPR_80+_M','FPR_0_20_M','FPR_60_80_F',
           'FPR_40_60_F','FPR_20_40_F','FPR_80+_F','FPR_0_20_F']
race = ['AMERICAN INDIAN/ALASKA NATIVE','ASIAN','BLACK/AFRICAN AMERICAN','HISPANIC/LATINO','OTHER','WHITE']
RaceAgeSex_df = pd.DataFrame(race, columns=["race"])

RaceAgeSex_df = FiveRun(factors,RaceAgeSex_df,fp_race_age_sex_df)

RaceAgeSex_df.to_csv(ThreeGroup_FPR+'/Inter_RaceAgeSex.csv')
RaceAgeSex_df

Unnamed: 0,race,FPR_60_80_M,CI_FPR_60_80_M,FPR_40_60_M,CI_FPR_40_60_M,FPR_20_40_M,CI_FPR_20_40_M,FPR_80+_M,CI_FPR_80+_M,FPR_0_20_M,...,FPR_60_80_F,CI_FPR_60_80_F,FPR_40_60_F,CI_FPR_40_60_F,FPR_20_40_F,CI_FPR_20_40_F,FPR_80+_F,CI_FPR_80+_F,FPR_0_20_F,CI_FPR_0_20_F
0,AMERICAN INDIAN/ALASKA NATIVE,0.082,0.018,0.114,0.014,0.088,0.012,0.0,0.0,1.0,...,0.116,0.023,0.1,0.02,0.19,0.0,0.067,0.08,0.2,0.24
1,ASIAN,0.067,0.004,0.068,0.001,0.058,0.005,0.037,0.015,0.0,...,0.087,0.003,0.083,0.004,0.064,0.007,0.04,0.006,,
2,BLACK/AFRICAN AMERICAN,0.059,0.007,0.093,0.011,0.122,0.009,0.139,0.011,0.0,...,0.077,0.005,0.116,0.011,0.051,0.005,0.063,0.013,0.0,0.0
3,HISPANIC/LATINO,0.014,0.007,0.05,0.007,0.022,0.0,0.04,0.013,0.5,...,0.036,0.0,0.04,0.017,0.0,0.0,0.125,0.0,0.0,0.0
4,OTHER,0.052,0.004,0.058,0.008,0.097,0.007,0.084,0.006,0.125,...,0.111,0.007,0.05,0.003,0.077,0.003,0.145,0.018,0.75,0.0
5,WHITE,0.046,0.003,0.065,0.003,0.1,0.01,0.042,0.004,0.053,...,0.059,0.005,0.073,0.003,0.063,0.003,0.09,0.018,0.0,0.0
