In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from statistics import mean

In [2]:
number_of_runs=5
significance_level=1.96 # for 95% ci
height = 6
font_size=14
rotation_degree =15

In [3]:
sub_group_FPR = './FPR/SubGroup_FPR'

## Sex

In [4]:
seed_19_sex = pd.read_csv(sub_group_FPR+"/run_19FPR_FNR_NF_sex.csv",index_col=0)
seed_31_sex = pd.read_csv(sub_group_FPR+"/run_31FPR_FNR_NF_sex.csv",index_col=0)
seed_38_sex = pd.read_csv(sub_group_FPR+"/run_38FPR_FNR_NF_sex.csv",index_col=0)
seed_47_sex = pd.read_csv(sub_group_FPR+"/run_47FPR_FNR_NF_sex.csv",index_col=0)
seed_77_sex = pd.read_csv(sub_group_FPR+"/run_77FPR_FNR_NF_sex.csv",index_col=0)
seed_77_sex.head(5)

Unnamed: 0,diseases,#M,FPR_M,FNR_M,#F,FPR_F,FNR_F
0,No Finding,18025,0.103,0.261,13001,0.126,0.215


In [5]:
sex_dataframes = [seed_19_sex, seed_31_sex, seed_38_sex, seed_47_sex, seed_77_sex]
result_sex = pd.concat(sex_dataframes)

result_sex_df = result_sex.describe()

In [6]:
print("FPR and FNR distribiution in sexes")
print(round(result_sex_df.loc['mean'],3))

FPR and FNR distribiution in sexes
#M       18025.000
FPR_M        0.108
FNR_M        0.248
#F       13001.000
FPR_F        0.133
FNR_F        0.200
Name: mean, dtype: float64


In [7]:
print("FPR and FNR distribiution in sex Confidence interval")
round(significance_level * result_sex_df.loc['std'] / np.sqrt(5),3)

FPR and FNR distribiution in sex Confidence interval


#M       0.000
FPR_M    0.004
FNR_M    0.007
#F       0.000
FPR_F    0.005
FNR_F    0.008
Name: std, dtype: float64

In [8]:
print("ALL: Mean FPR distribiution over sexes")
mean([result_sex_df.loc['mean']["FPR_F"], result_sex_df.loc['mean']["FPR_M"]])

ALL: Mean FPR distribiution over sexes


0.12070000000000002

In [9]:
sex = ['M','F' ]
fpr_fnr_sex_df = pd.DataFrame(sex, columns=["sex"])

In [10]:

def FiveRunSubgroup(factors, df_in, df_out):
    fpr = []
    fnr = []
    percent = []
    ci_fpr =[]
    ci_fnr =[]
    confI = significance_level * df_in.loc['std'] / np.sqrt(5)

    for fact in factors:
        percent.append(round(df_in.loc['mean']['#'+fact],3))
        fpr.append(round(df_in.loc['mean']['FPR_'+fact],3))
        fnr.append(round(df_in.loc['mean']['FNR_'+fact],3))
        ci_fpr.append(round(confI.loc['FPR_'+fact],3))
        ci_fnr.append(round(confI.loc['FNR_'+fact],3))

    df_out['#'] = percent
    df_out['FPR']=fpr
    df_out['CI_FPR']=ci_fpr

    df_out['FNR']=fnr
    df_out['CI_FNR']=ci_fnr

    return df_out


In [11]:
fpr_fnr_sex_df=FiveRunSubgroup(sex, result_sex_df, fpr_fnr_sex_df)
fpr_fnr_sex_df.to_csv(sub_group_FPR+'/Subgroup_FNR_FPR_Sex.csv')
fpr_fnr_sex_df

Unnamed: 0,sex,#,FPR,CI_FPR,FNR,CI_FNR
0,M,18025.0,0.108,0.004,0.248,0.007
1,F,13001.0,0.133,0.005,0.2,0.008


## Age

In [12]:
seed_19_age = pd.read_csv(sub_group_FPR+"/run_19FPR_FNR_NF_age.csv",index_col=0)
seed_31_age = pd.read_csv(sub_group_FPR+"/run_31FPR_FNR_NF_age.csv",index_col=0)
seed_38_age = pd.read_csv(sub_group_FPR+"/run_38FPR_FNR_NF_age.csv",index_col=0)
seed_47_age = pd.read_csv(sub_group_FPR+"/run_47FPR_FNR_NF_age.csv",index_col=0)
seed_77_age = pd.read_csv(sub_group_FPR+"/run_77FPR_FNR_NF_age.csv",index_col=0)
seed_77_age.head(5)

Unnamed: 0,diseases,#60-80,FPR_60-80,FNR_60-80,#40-60,FPR_40-60,FNR_40-60,#20-40,FPR_20-40,FNR_20-40,#80+,FPR_80+,FNR_80+,#0-20,FPR_0-20,FNR_0-20
0,No Finding,6526,0.061,0.234,4770,0.08,0.16,1797,0.092,0.111,3033,0.061,0.374,125,0.096,0.122


In [13]:
age_dataframes = [seed_19_age, seed_31_age, seed_38_age, seed_47_age, seed_77_age]
result_age_df= pd.concat(age_dataframes)

result_age_df = result_age_df.describe()

In [14]:
print("FPR distribiution in ages")
print(round(result_age_df.loc['mean'],3))

FPR distribiution in ages
#60-80       6526.000
FPR_60-80       0.061
FNR_60-80       0.216
#40-60       4770.000
FPR_40-60       0.076
FNR_40-60       0.159
#20-40       1797.000
FPR_20-40       0.091
FNR_20-40       0.111
#80+         3033.000
FPR_80+         0.061
FNR_80+         0.362
#0-20         125.000
FPR_0-20        0.082
FNR_0-20        0.141
Name: mean, dtype: float64


In [15]:
print("ALL: Mean FPR distribiution over ages")
mean([result_age_df.loc['mean']["FPR_60-80"], result_age_df.loc['mean']["FPR_40-60"],
      result_age_df.loc['mean']["FPR_20-40"], result_age_df.loc['mean']["FPR_0-20"],
      result_age_df.loc['mean']["FPR_80+"]])

ALL: Mean FPR distribiution over ages


0.07416

In [16]:
round(significance_level* result_age_df.loc['std'] / np.sqrt(5),3)

#60-80       0.000
FPR_60-80    0.004
FNR_60-80    0.016
#40-60       0.000
FPR_40-60    0.005
FNR_40-60    0.009
#20-40       0.000
FPR_20-40    0.004
FNR_20-40    0.006
#80+         0.000
FPR_80+      0.006
FNR_80+      0.025
#0-20        0.000
FPR_0-20     0.013
FNR_0-20     0.018
Name: std, dtype: float64

In [17]:
age = ['0-20','20-40','40-60','60-80','80+' ]
fpr_fnr_age_df = pd.DataFrame(age, columns=["Age"])

fpr_fnr_age_df=FiveRunSubgroup(age, result_age_df, fpr_fnr_age_df)
fpr_fnr_age_df.to_csv(sub_group_FPR+'/Subgrounp_FNR_FPR_Age.csv')
fpr_fnr_age_df

Unnamed: 0,Age,#,FPR,CI_FPR,FNR,CI_FNR
0,0-20,125.0,0.082,0.013,0.141,0.018
1,20-40,1797.0,0.091,0.004,0.111,0.006
2,40-60,4770.0,0.076,0.005,0.159,0.009
3,60-80,6526.0,0.061,0.004,0.216,0.016
4,80+,3033.0,0.061,0.006,0.362,0.025


## Race

In [18]:
seed_19_race = pd.read_csv(sub_group_FPR+"/run_19FPR_FNR_NF_race.csv",index_col=0)
seed_31_race = pd.read_csv(sub_group_FPR+"/run_31FPR_FNR_NF_race.csv",index_col=0)
seed_38_race = pd.read_csv(sub_group_FPR+"/run_38FPR_FNR_NF_race.csv",index_col=0)
seed_47_race = pd.read_csv(sub_group_FPR+"/run_47FPR_FNR_NF_race.csv",index_col=0)
seed_77_race = pd.read_csv(sub_group_FPR+"/run_77FPR_FNR_NF_race.csv",index_col=0)
seed_77_race.head(3)

Unnamed: 0,diseases,#White,FPR_White,FNR_White,#Black,FPR_Black,FNR_Black,#Hisp,FPR_Hisp,FNR_Hisp,#Other,FPR_Other,FNR_Other,#Asian,FPR_Asian,FNR_Asian,#American,FPR_American,FNR_American
0,No Finding,20900,0.105,0.267,3225,0.178,0.193,1256,0.157,0.186,2941,0.091,0.19,2361,0.089,0.207,343,0.125,0.305


In [19]:
race_dataframes = [seed_19_race, seed_31_race, seed_38_race, seed_47_race, seed_77_race]
result_race= pd.concat(race_dataframes)

result_race_df =result_race.describe()

In [20]:
print("FPR distribiution in races")
print(round(result_race_df.loc['mean'],3))

FPR distribiution in races
#White          20900.000
FPR_White           0.111
FNR_White           0.252
#Black           3225.000
FPR_Black           0.188
FNR_Black           0.176
#Hisp            1256.000
FPR_Hisp            0.167
FNR_Hisp            0.166
#Other           2941.000
FPR_Other           0.096
FNR_Other           0.186
#Asian           2361.000
FPR_Asian           0.093
FNR_Asian           0.195
#American         343.000
FPR_American        0.123
FNR_American        0.289
Name: mean, dtype: float64


In [21]:
round(significance_level * result_race_df.loc['std'] / np.sqrt(5),3)

#White          0.000
FPR_White       0.004
FNR_White       0.007
#Black          0.000
FPR_Black       0.006
FNR_Black       0.009
#Hisp           0.000
FPR_Hisp        0.007
FNR_Hisp        0.013
#Other          0.000
FPR_Other       0.004
FNR_Other       0.007
#Asian          0.000
FPR_Asian       0.004
FNR_Asian       0.009
#American       0.000
FPR_American    0.005
FNR_American    0.018
Name: std, dtype: float64

In [22]:
race = ['White','Black','Hisp','Other','Asian','American' ]
fpr_fpr_race_df = pd.DataFrame(race, columns=["Race"])

fpr_fpr_race_df=FiveRunSubgroup(race, result_race_df, fpr_fpr_race_df)
fpr_fpr_race_df.to_csv(sub_group_FPR+'/Subgroup_FNR_FPR_Race.csv')
fpr_fpr_race_df

Unnamed: 0,Race,#,FPR,CI_FPR,FNR,CI_FNR
0,White,20900.0,0.111,0.004,0.252,0.007
1,Black,3225.0,0.188,0.006,0.176,0.009
2,Hisp,1256.0,0.167,0.007,0.166,0.013
3,Other,2941.0,0.096,0.004,0.186,0.007
4,Asian,2361.0,0.093,0.004,0.195,0.009
5,American,343.0,0.123,0.005,0.289,0.018


# Two group Intersectional identity

In [23]:
two_group_FPR = './FPR/Two_Group_Intersection_FPR/'

## Age-Sex

In [24]:
seed_19_agesex = pd.read_csv(two_group_FPR+"/run_19FP_AgeSex.csv")
seed_31_agesex= pd.read_csv(two_group_FPR+"/run_31FP_AgeSex.csv")
seed_38_agesex = pd.read_csv(two_group_FPR+"/run_38FP_AgeSex.csv")
seed_47_agesex = pd.read_csv(two_group_FPR+"/run_47FP_AgeSex.csv")
seed_77_agesex = pd.read_csv(two_group_FPR+"/run_77FP_AgeSex.csv")

fp_agesex =pd.concat([seed_19_agesex, seed_31_agesex,seed_38_agesex, seed_47_agesex,seed_77_agesex])
fp_agesex =fp_agesex.groupby("Age")
fp_agesex = fp_agesex.describe()

fp_agesex

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_M,FPR_M,FPR_M,FPR_M,FPR_M,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0-20,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.079,...,0.079,0.105,5.0,0.0858,0.022186,0.061,0.082,0.082,0.082,0.122
20-40,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.1006,...,0.103,0.108,5.0,0.0762,0.005675,0.068,0.073,0.078,0.08,0.082
40-60,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.0732,...,0.077,0.078,5.0,0.0812,0.005404,0.075,0.076,0.083,0.085,0.087
60-80,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0546,...,0.057,0.06,5.0,0.0714,0.006348,0.064,0.068,0.071,0.073,0.081
80+,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.046,...,0.045,0.055,5.0,0.0814,0.011014,0.07,0.077,0.077,0.084,0.099


In [25]:
print("Age-Sex over male")
round(fp_agesex['FPR_M']['mean'],3)

Age-Sex over male


Age
0-20     0.079
20-40    0.101
40-60    0.073
60-80    0.055
80+      0.046
Name: mean, dtype: float64

In [26]:
round(significance_level * fp_agesex['FPR_M']["std"] / np.sqrt(5),3)

Age
0-20     0.014
20-40    0.004
40-60    0.005
60-80    0.003
80+      0.005
Name: std, dtype: float64

In [27]:
print("mean: ", round(fp_agesex['FPR_M']['mean'],3).mean() )

mean:  0.0708


In [28]:
round(fp_agesex['FPR_F']['mean'],3)

Age
0-20     0.086
20-40    0.076
40-60    0.081
60-80    0.071
80+      0.081
Name: mean, dtype: float64

In [29]:
round(significance_level * fp_agesex['FPR_F']["std"] / np.sqrt(5),3)

Age
0-20     0.019
20-40    0.005
40-60    0.005
60-80    0.006
80+      0.010
Name: std, dtype: float64

In [30]:
CI = round(significance_level * fp_agesex['FPR_F']["std"] / np.sqrt(5),3)

In [31]:
def FiveRun(factors,output_df,df):
  for factor in factors:
    fpr = round(df[factor]['mean'],3)
    confI = round(significance_level * df[factor]["std"] / np.sqrt(5),3)
    output_df[factor] = pd.DataFrame(fpr.values.tolist(),columns =[factor])
    output_df['CI_'+factor] = pd.DataFrame(confI.values.tolist(),columns =['CI_'+factor])

  return output_df

In [32]:
factors = ['FPR_F', 'FPR_M']
age =['0-20', '20-40', '40-60', '60-80','80+']
agesex_df = pd.DataFrame(age, columns=["Age"])

agesex_df = FiveRun(factors,agesex_df,fp_agesex)
agesex_df.to_csv(two_group_FPR+'/Inter_AgeSex.csv')
agesex_df

Unnamed: 0,Age,FPR_F,CI_FPR_F,FPR_M,CI_FPR_M
0,0-20,0.086,0.019,0.079,0.014
1,20-40,0.076,0.005,0.101,0.004
2,40-60,0.081,0.005,0.073,0.005
3,60-80,0.071,0.006,0.055,0.003
4,80+,0.081,0.01,0.046,0.005


## Race- Sex

In [33]:
seed_19_race_sex = pd.read_csv(two_group_FPR+"/run_19FP_RaceSex.csv")
seed_31_race_sex= pd.read_csv(two_group_FPR+"/run_31FP_RaceSex.csv")
seed_38_race_sex = pd.read_csv(two_group_FPR+"/run_38FP_RaceSex.csv")
seed_47_race_sex = pd.read_csv(two_group_FPR+"/run_47FP_RaceSex.csv")
seed_77_race_sex = pd.read_csv(two_group_FPR+"/run_77FP_RaceSex.csv")

fp_race_sex =pd.concat([seed_19_race_sex, seed_31_race_sex,seed_38_race_sex,
                       seed_47_race_sex,seed_77_race_sex])

fp_race_sex =fp_race_sex.groupby("race")
fp_race_sex = fp_race_sex.describe()

fp_race_sex

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_M,FPR_M,FPR_M,FPR_M,FPR_M,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F,FPR_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AMERICAN INDIAN/ALASKA NATIVE,5.0,5.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,0.1194,...,0.123,0.129,5.0,0.1268,0.007823,0.116,0.122,0.128,0.134,0.134
ASIAN,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0796,...,0.083,0.086,5.0,0.1174,0.004506,0.113,0.115,0.115,0.12,0.124
BLACK/AFRICAN AMERICAN,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.16,...,0.165,0.172,5.0,0.216,0.006124,0.206,0.216,0.216,0.221,0.221
HISPANIC/LATINO,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.152,...,0.159,0.161,5.0,0.1888,0.008955,0.177,0.183,0.191,0.193,0.2
OTHER,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.085,...,0.085,0.094,5.0,0.1086,0.007162,0.102,0.105,0.105,0.111,0.12
WHITE,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.1046,...,0.106,0.11,5.0,0.1202,0.00563,0.112,0.117,0.122,0.125,0.125


In [34]:
factors = ['FPR_F', 'FPR_M']
race =['AMERICAN INDIAN/ALASKA NATIVE', 'ASIAN', 'BLACK/AFRICAN AMERICAN',
       'HISPANIC/LATINO','OTHER','WHITE']
RaceSex_df = pd.DataFrame(race, columns=["race"])

RaceSex_df = FiveRun(factors,RaceSex_df,fp_race_sex)
RaceSex_df.to_csv(two_group_FPR+'/Inter_RaceSex.csv')
RaceSex_df

Unnamed: 0,race,FPR_F,CI_FPR_F,FPR_M,CI_FPR_M
0,AMERICAN INDIAN/ALASKA NATIVE,0.127,0.007,0.119,0.006
1,ASIAN,0.117,0.004,0.08,0.004
2,BLACK/AFRICAN AMERICAN,0.216,0.005,0.16,0.008
3,HISPANIC/LATINO,0.189,0.008,0.152,0.007
4,OTHER,0.109,0.006,0.085,0.005
5,WHITE,0.12,0.005,0.105,0.003


## Race-Age

In [35]:
seed_19_race_age = pd.read_csv(two_group_FPR+"/run_19FP_RaceAge.csv")
seed_31_race_age= pd.read_csv(two_group_FPR+"/run_31FP_RaceAge.csv")
seed_38_race_age = pd.read_csv(two_group_FPR+"/run_38FP_RaceAge.csv")
seed_47_race_age = pd.read_csv(two_group_FPR+"/run_47FP_RaceAge.csv")
seed_77_race_age = pd.read_csv(two_group_FPR+"/run_77FP_RaceAge.csv")

fp_race_age =pd.concat([seed_19_race_age, seed_31_race_age,seed_38_race_age,
                       seed_47_race_age,seed_77_race_age])

fp_race_age =fp_race_age.groupby("age")
fp_race_age = fp_race_age.describe()
fp_race_age

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_White,FPR_White,...,FPR_Asian,FPR_Asian,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American,FPR_American
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
age,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0-20,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.047,...,0.0,0.0,5.0,0.4666,0.182939,0.333,0.333,0.333,0.667,0.667
20-40,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.099,...,0.07,0.08,5.0,0.145,0.019596,0.113,0.145,0.145,0.161,0.161
40-60,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.0788,...,0.082,0.089,5.0,0.1152,0.013791,0.096,0.115,0.115,0.115,0.135
60-80,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0546,...,0.085,0.088,5.0,0.1044,0.016441,0.081,0.093,0.116,0.116,0.116
80+,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0596,...,0.036,0.038,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
round(significance_level * fp_race_age['FPR_White']["std"] / np.sqrt(5),3)

age
0-20     0.017
20-40    0.005
40-60    0.005
60-80    0.004
80+      0.006
Name: std, dtype: float64

In [37]:
factors = ['FPR_White', 'FPR_Black','FPR_Hisp','FPR_Other','FPR_Asian','FPR_American']
age =['0-20', '20-40', '40-60', '60-80','80+']
RaceAge_df = pd.DataFrame(age, columns=["age"])


RaceAge_df = FiveRun(factors,RaceAge_df,fp_race_age)
RaceAge_df.to_csv(two_group_FPR+'/Inter_RaceAge.csv')
RaceAge_df

Unnamed: 0,age,FPR_White,CI_FPR_White,FPR_Black,CI_FPR_Black,FPR_Hisp,CI_FPR_Hisp,FPR_Other,CI_FPR_Other,FPR_Asian,CI_FPR_Asian,FPR_American,CI_FPR_American
0,0-20,0.047,0.017,0.0,0.0,0.333,0.0,0.383,0.04,0.0,0.0,0.467,0.16
1,20-40,0.099,0.005,0.08,0.006,0.019,0.0,0.096,0.004,0.07,0.006,0.145,0.017
2,40-60,0.079,0.005,0.108,0.008,0.043,0.007,0.06,0.005,0.079,0.006,0.115,0.012
3,60-80,0.055,0.004,0.072,0.006,0.021,0.004,0.078,0.005,0.08,0.006,0.104,0.014
4,80+,0.06,0.006,0.087,0.015,0.056,0.011,0.106,0.009,0.034,0.003,0.0,0.0


# Three group Intersectional identity

## Race , age and sex

In [38]:
three_group_FPR = './FPR/Three_Group_Intersection_FPR/'


In [39]:
RaceAgeSex_19 = pd.read_csv(three_group_FPR+"/run_19FP_RaceAgeSex.csv")
RaceAgeSex_31 = pd.read_csv(three_group_FPR+"/run_31FP_RaceAgeSex.csv")
RaceAgeSex_38 = pd.read_csv(three_group_FPR+"/run_38FP_RaceAgeSex.csv")
RaceAgeSex_47 = pd.read_csv(three_group_FPR+"/run_47FP_RaceAgeSex.csv")
RaceAgeSex_77 = pd.read_csv(three_group_FPR+"/run_77FP_RaceAgeSex.csv")

In [40]:

fp_race_age_sex_df =pd.concat([RaceAgeSex_19, RaceAgeSex_31,RaceAgeSex_38,
                       RaceAgeSex_47,RaceAgeSex_77])

fp_race_age_sex_df =fp_race_age_sex_df.groupby("race")
fp_race_age_sex_df = fp_race_age_sex_df.describe()
fp_race_age_sex_df

Unnamed: 0_level_0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,Unnamed: 0,FPR_60_80_M,FPR_60_80_M,...,FPR_80+_F,FPR_80+_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F,FPR_0_20_F
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
race,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AMERICAN INDIAN/ALASKA NATIVE,5.0,5.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,0.091,...,0.0,0.0,5.0,0.2,0.273861,0.0,0.0,0.0,0.5,0.5
ASIAN,5.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,5.0,0.0734,...,0.04,0.046,0.0,,,,,,,
BLACK/AFRICAN AMERICAN,5.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,5.0,0.0692,...,0.079,0.089,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HISPANIC/LATINO,5.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,5.0,0.0,...,0.125,0.125,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
OTHER,5.0,3.0,0.0,3.0,3.0,3.0,3.0,3.0,5.0,0.0504,...,0.136,0.182,5.0,0.9,0.136931,0.75,0.75,1.0,1.0,1.0
WHITE,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0514,...,0.087,0.102,5.0,0.0052,0.011628,0.0,0.0,0.0,0.0,0.026


In [41]:
factors = ['FPR_60_80_M', 'FPR_40_60_M','FPR_20_40_M','FPR_80+_M','FPR_0_20_M','FPR_60_80_F',
           'FPR_40_60_F','FPR_20_40_F','FPR_80+_F','FPR_0_20_F']
race = ['AMERICAN INDIAN/ALASKA NATIVE','ASIAN','BLACK/AFRICAN AMERICAN','HISPANIC/LATINO','OTHER','WHITE']
RaceAgeSex_df = pd.DataFrame(race, columns=["race"])

RaceAgeSex_df = FiveRun(factors,RaceAgeSex_df,fp_race_age_sex_df)

RaceAgeSex_df.to_csv(three_group_FPR+'/Inter_RaceAgeSex.csv')
RaceAgeSex_df

Unnamed: 0,race,FPR_60_80_M,CI_FPR_60_80_M,FPR_40_60_M,CI_FPR_40_60_M,FPR_20_40_M,CI_FPR_20_40_M,FPR_80+_M,CI_FPR_80+_M,FPR_0_20_M,...,FPR_60_80_F,CI_FPR_60_80_F,FPR_40_60_F,CI_FPR_40_60_F,FPR_20_40_F,CI_FPR_20_40_F,FPR_80+_F,CI_FPR_80+_F,FPR_0_20_F,CI_FPR_0_20_F
0,AMERICAN INDIAN/ALASKA NATIVE,0.091,0.0,0.114,0.014,0.107,0.019,0.0,0.0,1.0,...,0.109,0.019,0.117,0.016,0.219,0.023,0.0,0.0,0.2,0.24
1,ASIAN,0.073,0.005,0.073,0.007,0.054,0.007,0.03,0.002,0.0,...,0.094,0.007,0.092,0.006,0.085,0.01,0.04,0.004,,
2,BLACK/AFRICAN AMERICAN,0.069,0.006,0.085,0.011,0.126,0.014,0.143,0.017,0.0,...,0.075,0.008,0.136,0.005,0.046,0.006,0.062,0.019,0.0,0.0
3,HISPANIC/LATINO,0.0,0.0,0.059,0.005,0.022,0.0,0.04,0.013,0.5,...,0.044,0.009,0.026,0.011,0.0,0.0,0.125,0.0,0.0,0.0
4,OTHER,0.05,0.005,0.065,0.004,0.102,0.005,0.078,0.004,0.125,...,0.11,0.006,0.053,0.006,0.085,0.006,0.145,0.018,0.9,0.12
5,WHITE,0.051,0.003,0.074,0.005,0.117,0.004,0.043,0.005,0.1,...,0.06,0.005,0.085,0.005,0.072,0.007,0.085,0.01,0.005,0.01
