## INSTALL PACKAGES AND READ CSV FILES FROM GITHUB

In [1]:
library(data.table)

In [2]:
library(stringr)

In [3]:
library(comprehenr) # for list comprehension; to_list

In [4]:
library(tibble)

In [5]:
library(readr)

In [6]:
cross_country <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/compute_CROSS_COUNTRY.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m7[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (6): ALL (n = 5008), EUR (n = 1006), AFR (n = 1322), AMR (n = 694), EAS ...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


### create folder for haplotype frequency data

In [7]:
dir.create('./hapFreq')

“'./hapFreq' already exists”


### define function to compute percentage

In [8]:
prop <- function(col_name, pop_total) {
    the_list <- list()
    for (i in col_name) {
        x <-  (i * 100)/pop_total
        x <- format(round(x, 2), nsmall = 2)
        y <- paste (as.character(i), ' (', as.character(x), ')', sep = '')
        the_list <- append(the_list, y)
    }
    the_list
        }

In [9]:
cross_country

POLYMORPHISM,ALL (n = 5008),EUR (n = 1006),AFR (n = 1322),AMR (n = 694),EAS (n = 1008),SAS (n = 978)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,345,731,227,141,354
CYP2D6*3,28,19,3,4,0,2
CYP2D6*4,466,187,80,90,2,107
CYP2D6*6,24,20,1,2,0,1
CYP2D6*9,36,26,1,9,0,0
CYP2D6*10,1178,202,146,103,567,160
CYP2D6*14,10,0,0,0,10,0
CYP2D6*17,295,2,287,6,0,0
CYP2D6*41,317,94,24,42,38,119
CYP2C19*2,1108,146,225,72,315,350


In [10]:
polymorphisms <- cross_country$POLYMORPHISM

In [11]:
ALL = prop(cross_country$'ALL (n = 5008)', 5008)
EUR = prop(cross_country$'EUR (n = 1006)', 1006)
AFR = prop(cross_country$'AFR (n = 1322)', 1322)
AMR = prop(cross_country$'AMR (n = 694)', 694)
EAS = prop(cross_country$'EAS (n = 1008)', 1008)
SAS = prop(cross_country$'SAS (n = 978)', 978)

cross_country_list <- list (polymorphisms = polymorphisms,
                            'ALL (n = 5008)' = ALL, 
                            'EUR (n = 1006)' = EUR, 
                            'AFR (n = 1322)' = AFR, 
                            'AMR (n = 694)' = AMR,
                            'EAS (n = 1008)' = EAS,
                            'SAS (n = 978)' = SAS)

cross_country_prop <- as.data.frame(do.call(cbind, cross_country_list)) 
fwrite(cross_country_prop, file ="./hapFreq/all.csv")
cross_country_prop

polymorphisms,ALL (n = 5008),EUR (n = 1006),AFR (n = 1322),AMR (n = 694),EAS (n = 1008),SAS (n = 978)
<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),345 (34.29),731 (55.30),227 (32.71),141 (13.99),354 (36.20)
CYP2D6*3,28 (0.56),19 (1.89),3 (0.23),4 (0.58),0 (0.00),2 (0.20)
CYP2D6*4,466 (9.31),187 (18.59),80 (6.05),90 (12.97),2 (0.20),107 (10.94)
CYP2D6*6,24 (0.48),20 (1.99),1 (0.08),2 (0.29),0 (0.00),1 (0.10)
CYP2D6*9,36 (0.72),26 (2.58),1 (0.08),9 (1.30),0 (0.00),0 (0.00)
CYP2D6*10,1178 (23.52),202 (20.08),146 (11.04),103 (14.84),567 (56.25),160 (16.36)
CYP2D6*14,10 (0.20),0 (0.00),0 (0.00),0 (0.00),10 (0.99),0 (0.00)
CYP2D6*17,295 (5.89),2 (0.20),287 (21.71),6 (0.86),0 (0.00),0 (0.00)
CYP2D6*41,317 (6.33),94 (9.34),24 (1.82),42 (6.05),38 (3.77),119 (12.17)
CYP2C19*2,1108 (22.12),146 (14.51),225 (17.02),72 (10.37),315 (31.25),350 (35.79)


In [12]:
european <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/european_SNP_format.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (7): ALL (n = 5008), EUR (n = 1006), CEU (n = 198), TSI (n = 214), FIN (...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [13]:
european

POLYMORPHISM,ALL (n = 5008),EUR (n = 1006),CEU (n = 198),TSI (n = 214),FIN (n = 198),GBR (n = 182),IBS (n = 214)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,345,64,80,64,56,81
CYP2D6*3,28,19,4,0,7,6,2
CYP2D6*4,466,187,45,40,27,42,31
CYP2D6*6,24,20,4,4,9,2,1
CYP2D6*9,36,26,4,7,3,7,5
CYP2D6*10,1178,202,47,44,29,45,37
CYP2D6*14,10,0,0,0,0,0,0
CYP2D6*17,295,2,0,0,0,0,2
CYP2D6*41,317,94,24,31,7,13,19
CYP2C19*2,1108,146,26,20,43,26,31


In [14]:
ALL = prop(european$'ALL (n = 5008)', 5008)
EUR = prop(european$'EUR (n = 1006)', 1006)
CEU = prop(european$'CEU (n = 198)', 198)
TSI = prop(european$'TSI (n = 214)', 214)
FIN = prop(european$'FIN (n = 198)', 198)
GBR = prop(european$'GBR (n = 182)', 182)
IBS = prop(european$'IBS (n = 214)', 214)

european_list <- list (polymorphisms = polymorphisms,
                       'ALL (n = 5008)' = ALL, 
                       'EUR (n = 1006)' = EUR, 
                       'CEU (n = 198)' = CEU, 
                       'TSI (n = 214)' = TSI,
                       'FIN (n = 198)' = FIN,
                       'GBR (n = 182)' = GBR, 
                       'IBS (n = 214)' = IBS)

european_prop <- as.data.frame(do.call(cbind, european_list)) 
fwrite(european_prop, file ="./hapFreq/European.csv")
european_prop

polymorphisms,ALL (n = 5008),EUR (n = 1006),CEU (n = 198),TSI (n = 214),FIN (n = 198),GBR (n = 182),IBS (n = 214)
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),345 (34.29),64 (32.32),80 (37.38),64 (32.32),56 (30.77),81 (37.85)
CYP2D6*3,28 (0.56),19 (1.89),4 (2.02),0 (0.00),7 (3.54),6 (3.30),2 (0.93)
CYP2D6*4,466 (9.31),187 (18.59),45 (22.73),40 (18.69),27 (13.64),42 (23.08),31 (14.49)
CYP2D6*6,24 (0.48),20 (1.99),4 (2.02),4 (1.87),9 (4.55),2 (1.10),1 (0.47)
CYP2D6*9,36 (0.72),26 (2.58),4 (2.02),7 (3.27),3 (1.52),7 (3.85),5 (2.34)
CYP2D6*10,1178 (23.52),202 (20.08),47 (23.74),44 (20.56),29 (14.65),45 (24.73),37 (17.29)
CYP2D6*14,10 (0.20),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*17,295 (5.89),2 (0.20),0 (0.00),0 (0.00),0 (0.00),0 (0.00),2 (0.93)
CYP2D6*41,317 (6.33),94 (9.34),24 (12.12),31 (14.49),7 (3.54),13 (7.14),19 (8.88)
CYP2C19*2,1108 (22.12),146 (14.51),26 (13.13),20 (9.35),43 (21.72),26 (14.29),31 (14.49)


In [15]:
intra_africa <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/compute_INTRA_AFRICA.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m10[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (9): ALL (n = 5008), AFR (n = 1322), YRI (n = 216), LWK (n = 198), GWD (...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [16]:
intra_africa

POLYMORPHISM,ALL (n = 5008),AFR (n = 1322),YRI (n = 216),LWK (n = 198),GWD (n = 226),MSL (n = 170),ESN (n = 198),ASW (n = 122),ACB (n = 192)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,731,121,128,120,104,112,51,95
CYP2D6*3,28,3,0,0,0,0,0,2,1
CYP2D6*4,466,80,12,5,9,7,15,15,16
CYP2D6*6,24,1,0,0,0,0,0,1,0
CYP2D6*9,36,1,0,0,0,0,0,1,0
CYP2D6*10,1178,146,23,7,26,27,18,19,26
CYP2D6*14,10,0,0,0,0,0,0,0,0
CYP2D6*17,295,287,55,38,37,47,52,18,40
CYP2D6*41,317,24,2,6,1,1,3,2,9
CYP2C19*2,1108,225,36,42,30,30,41,17,29


In [17]:
ALL = prop(intra_africa$'ALL (n = 5008)', 5008)
AFR = prop(intra_africa$'AFR (n = 1322)', 1322)
YRI = prop(intra_africa$'YRI (n = 216)', 216)
LWK = prop(intra_africa$'LWK (n = 198)', 198)
GWD = prop(intra_africa$'GWD (n = 226)', 226)
MSL = prop(intra_africa$'MSL (n = 170)', 170)
ESN = prop(intra_africa$'ESN (n = 198)', 198)
ASW = prop(intra_africa$'ASW (n = 122)', 122)
ACB = prop(intra_africa$'ACB (n = 192)', 192)

africa_list <- list (polymorphisms = polymorphisms,
                     'ALL (n = 5008)' = ALL, 
                     'AFR (n = 1006)' = AFR, 
                     'YRI (n = 216)' = YRI, 
                     'LWK (n = 198)' = LWK,
                     'GWD (n = 226)' = GWD,
                     'MSL (n = 170)' = MSL, 
                     'ESN (n = 198)' = ESN,
                     'ASW (n = 122)' = ASW,
                     'ACB (n = 192)' = ACB)


africa_prop <- as.data.frame(do.call(cbind, africa_list)) 
fwrite(africa_prop, file ="./hapFreq/African.csv")
africa_prop

polymorphisms,ALL (n = 5008),AFR (n = 1006),YRI (n = 216),LWK (n = 198),GWD (n = 226),MSL (n = 170),ESN (n = 198),ASW (n = 122),ACB (n = 192)
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),731 (55.30),121 (56.02),128 (64.65),120 (53.10),104 (61.18),112 (56.57),51 (41.80),95 (49.48)
CYP2D6*3,28 (0.56),3 (0.23),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),2 (1.64),1 (0.52)
CYP2D6*4,466 (9.31),80 (6.05),12 (5.56),5 (2.53),9 (3.98),7 (4.12),15 (7.58),15 (12.30),16 (8.33)
CYP2D6*6,24 (0.48),1 (0.08),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),1 (0.82),0 (0.00)
CYP2D6*9,36 (0.72),1 (0.08),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),1 (0.82),0 (0.00)
CYP2D6*10,1178 (23.52),146 (11.04),23 (10.65),7 (3.54),26 (11.50),27 (15.88),18 (9.09),19 (15.57),26 (13.54)
CYP2D6*14,10 (0.20),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*17,295 (5.89),287 (21.71),55 (25.46),38 (19.19),37 (16.37),47 (27.65),52 (26.26),18 (14.75),40 (20.83)
CYP2D6*41,317 (6.33),24 (1.82),2 (0.93),6 (3.03),1 (0.44),1 (0.59),3 (1.52),2 (1.64),9 (4.69)
CYP2C19*2,1108 (22.12),225 (17.02),36 (16.67),42 (21.21),30 (13.27),30 (17.65),41 (20.71),17 (13.93),29 (15.10)


In [18]:
ad_mixed_american <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/ad_mixed_american_SNP_format.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m7[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (6): ALL (n = 5008), AMR (n = 694), MXL (n = 128), PUR (n = 208), CLM (n...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [19]:
ad_mixed_american

POLYMORPHISM,ALL (n = 5008),AMR (n = 694),MXL (n = 128),PUR (n = 208),CLM (n = 188),PEL (n = 170)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,227,33,81,56,55
CYP2D6*3,28,4,0,1,3,0
CYP2D6*4,466,90,16,32,31,11
CYP2D6*6,24,2,0,0,2,0
CYP2D6*9,36,9,1,5,3,0
CYP2D6*10,1178,103,19,37,35,12
CYP2D6*14,10,0,0,0,0,0
CYP2D6*17,295,6,0,2,2,2
CYP2D6*41,317,42,2,25,14,1
CYP2C19*2,1108,72,16,26,20,10


In [20]:
ALL = prop(ad_mixed_american$'ALL (n = 5008)', 5008)
AMR = prop(ad_mixed_american$'AMR (n = 694)', 694)
MXL = prop(ad_mixed_american$'MXL (n = 128)', 128)
PUR = prop(ad_mixed_american$'PUR (n = 208)', 208)
CLM = prop(ad_mixed_american$'CLM (n = 188)', 188)
PEL = prop(ad_mixed_american$'PEL (n = 170)', 170)


ad_mixed_american_list <- list (polymorphisms = polymorphisms,
                                'ALL (n = 5008)' = ALL, 
                                'AMR (n = 694)' = AMR, 
                                'MXL (n = 128)' = MXL, 
                                'PUR (n = 208)' = PUR,
                                'CLM (n = 188)' = CLM,
                                'PEL (n = 170)' = PEL)


ad_mixed_american_prop <- as.data.frame(do.call(cbind, ad_mixed_american_list)) 
fwrite(ad_mixed_american_prop, file ="./hapFreq/admixedAmerican.csv")
ad_mixed_american_prop

polymorphisms,ALL (n = 5008),AMR (n = 694),MXL (n = 128),PUR (n = 208),CLM (n = 188),PEL (n = 170)
<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),227 (32.71),33 (25.78),81 (38.94),56 (29.79),55 (32.35)
CYP2D6*3,28 (0.56),4 (0.58),0 (0.00),1 (0.48),3 (1.60),0 (0.00)
CYP2D6*4,466 (9.31),90 (12.97),16 (12.50),32 (15.38),31 (16.49),11 (6.47)
CYP2D6*6,24 (0.48),2 (0.29),0 (0.00),0 (0.00),2 (1.06),0 (0.00)
CYP2D6*9,36 (0.72),9 (1.30),1 (0.78),5 (2.40),3 (1.60),0 (0.00)
CYP2D6*10,1178 (23.52),103 (14.84),19 (14.84),37 (17.79),35 (18.62),12 (7.06)
CYP2D6*14,10 (0.20),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*17,295 (5.89),6 (0.86),0 (0.00),2 (0.96),2 (1.06),2 (1.18)
CYP2D6*41,317 (6.33),42 (6.05),2 (1.56),25 (12.02),14 (7.45),1 (0.59)
CYP2C19*2,1108 (22.12),72 (10.37),16 (12.50),26 (12.50),20 (10.64),10 (5.88)


In [21]:
east_asian <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/east_asian_SNP_format.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (7): ALL (n = 5008), EAS (n = 1008), CHB (n = 206), JPT (n = 208), CHS (...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [22]:
east_asian

POLYMORPHISM,ALL (n = 5008),EAS (n = 1008),CHB (n = 206),JPT (n = 208),CHS (n = 210),CDX (n = 186),KHV (n = 198)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,141,34,29,28,24,26
CYP2D6*3,28,0,0,0,0,0,0
CYP2D6*4,466,2,1,0,0,0,1
CYP2D6*6,24,0,0,0,0,0,0
CYP2D6*9,36,0,0,0,0,0,0
CYP2D6*10,1178,567,122,75,127,115,128
CYP2D6*14,10,10,1,1,0,3,5
CYP2D6*17,295,0,0,0,0,0,0
CYP2D6*41,317,38,7,1,10,15,5
CYP2C19*2,1108,315,69,67,74,49,56


In [23]:
ALL = prop(east_asian$'ALL (n = 5008)', 5008)
EAS = prop(east_asian$'EAS (n = 1008)', 1008)
CHB = prop(east_asian$'CHB (n = 206)', 206)
JPT = prop(east_asian$'JPT (n = 208)', 208)
CHS = prop(east_asian$'CHS (n = 210)', 210)
CDX = prop(east_asian$'CDX (n = 186)', 186)
KHV = prop(east_asian$'KHV (n = 198)', 198)

east_asian_list <- list (polymorphisms = polymorphisms,
                         'ALL (n = 5008)' = ALL, 
                         'EAS (n = 1008)' = EAS, 
                         'CHB (n = 206)' = CHB, 
                         'JPT (n = 208)' = JPT,
                         'CHS (n = 210)' = CHS,             
                         'CDX (n = 186)' = CDX,
                         'KHV (n = 198)' = KHV)


east_asian_prop <- as.data.frame(do.call(cbind, east_asian_list)) 
fwrite(east_asian_prop, file ="./hapFreq/eastAsian.csv")
east_asian_prop

polymorphisms,ALL (n = 5008),EAS (n = 1008),CHB (n = 206),JPT (n = 208),CHS (n = 210),CDX (n = 186),KHV (n = 198)
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),141 (13.99),34 (16.50),29 (13.94),28 (13.33),24 (12.90),26 (13.13)
CYP2D6*3,28 (0.56),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*4,466 (9.31),2 (0.20),1 (0.49),0 (0.00),0 (0.00),0 (0.00),1 (0.51)
CYP2D6*6,24 (0.48),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*9,36 (0.72),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*10,1178 (23.52),567 (56.25),122 (59.22),75 (36.06),127 (60.48),115 (61.83),128 (64.65)
CYP2D6*14,10 (0.20),10 (0.99),1 (0.49),1 (0.48),0 (0.00),3 (1.61),5 (2.53)
CYP2D6*17,295 (5.89),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*41,317 (6.33),38 (3.77),7 (3.40),1 (0.48),10 (4.76),15 (8.06),5 (2.53)
CYP2C19*2,1108 (22.12),315 (31.25),69 (33.50),67 (32.21),74 (35.24),49 (26.34),56 (28.28)


In [24]:
south_asian <- read_csv("https://raw.github.com/BBLinus/pharmgenomics/main/south_asian_SNP_format.csv")

[1mRows: [22m[34m41[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): POLYMORPHISM
[32mdbl[39m (7): ALL (n = 5008), SAS (n = 978), GIH (n = 206), PJL (n = 192), BEB (n...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [25]:
south_asian

POLYMORPHISM,ALL (n = 5008),SAS (n = 978),GIH (n = 206),PJL (n = 192),BEB (n = 172),STU (n = 204),ITU (n = 204)
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CYP2D6*2,1798,354,91,61,43,77,82
CYP2D6*3,28,2,1,0,1,0,0
CYP2D6*4,466,107,26,15,21,22,23
CYP2D6*6,24,1,0,1,0,0,0
CYP2D6*9,36,0,0,0,0,0,0
CYP2D6*10,1178,160,31,20,43,31,35
CYP2D6*17,295,0,0,0,0,0,0
CYP2D6*41,317,119,30,21,16,24,28
CYP2C19*2,1108,350,68,66,56,84,76
CYP2C19*3,71,12,1,3,4,3,1


In [26]:
ALL = prop(south_asian$'ALL (n = 5008)', 5008)
SAS = prop(south_asian$'SAS (n = 978)', 978)
GIH = prop(south_asian$'GIH (n = 206)', 206)
PJL = prop(south_asian$'PJL (n = 192)', 192)
BEB = prop(south_asian$'BEB (n = 172)', 172)
STU = prop(south_asian$'STU (n = 204)', 204)
ITU = prop(south_asian$'ITU (n = 204)', 204)

south_asian_list <- list (polymorphisms = polymorphisms,
                          'ALL (n = 5008)' = ALL, 
                          'SAS (n = 978)' = SAS, 
                          'GIH (n = 206)' = GIH, 
                          'PJL (n = 192)' = PJL,
                          'BEB (n = 172)' = BEB,             
                          'STU (n = 204)' = STU,
                          'ITU (n = 204)' = ITU)


south_asian_prop <- as.data.frame(do.call(cbind, south_asian_list)) 
fwrite(south_asian_prop, file ="./hapFreq/southAsian.csv")
south_asian_prop

polymorphisms,ALL (n = 5008),SAS (n = 978),GIH (n = 206),PJL (n = 192),BEB (n = 172),STU (n = 204),ITU (n = 204)
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,1798 (35.90),354 (36.20),91 (44.17),61 (31.77),43 (25.00),77 (37.75),82 (40.20)
CYP2D6*3,28 (0.56),2 (0.20),1 (0.49),0 (0.00),1 (0.58),0 (0.00),0 (0.00)
CYP2D6*4,466 (9.31),107 (10.94),26 (12.62),15 (7.81),21 (12.21),22 (10.78),23 (11.27)
CYP2D6*6,24 (0.48),1 (0.10),0 (0.00),1 (0.52),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*9,36 (0.72),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*10,1178 (23.52),160 (16.36),31 (15.05),20 (10.42),43 (25.00),31 (15.20),35 (17.16)
CYP2D6*14,295 (5.89),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00),0 (0.00)
CYP2D6*17,317 (6.33),119 (12.17),30 (14.56),21 (10.94),16 (9.30),24 (11.76),28 (13.73)
CYP2D6*41,1108 (22.12),350 (35.79),68 (33.01),66 (34.38),56 (32.56),84 (41.18),76 (37.25)
CYP2C19*2,71 (1.42),12 (1.23),1 (0.49),3 (1.56),4 (2.33),3 (1.47),1 (0.49)


## DEFINE FUNCTION FOR POPULATION VS REST OF THE WORLD

In [27]:
conf_int_v_ALL <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'ALL (n = 5008)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 5008 
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## WRITE TO FILE FUNCTION

In [28]:
write_to_file <- function (the_list, file_name) {
    for (i in the_list) {
        line <- print (i, fill = getOption("width"))
        write(line, file=file_name, append=TRUE)
    }
}

In [29]:
# write polymorphism row names to file
write_to_file (polymorphisms, 'polymorphisms_row_names')

[1] "CYP2D6*2"
[1] "CYP2D6*3"
[1] "CYP2D6*4"
[1] "CYP2D6*6"
[1] "CYP2D6*9"
[1] "CYP2D6*10"
[1] "CYP2D6*14"
[1] "CYP2D6*17"
[1] "CYP2D6*41"
[1] "CYP2C19*2"
[1] "CYP2C19*3"
[1] "CYP2C19*4"
[1] "CYP2C19*17"
[1] "CYP2C9*2"
[1] "CYP2C9*3"
[1] "CYP2C9*5"
[1] "CYP2C9*6"
[1] "CYP2C9*8"
[1] "CYP2C9*11"
[1] "CYP2C9*13"
[1] "CYP2C9*14"
[1] "CYP2C9*16"
[1] "CYP2C9*29"
[1] "CYP2C9*31"
[1] "CYP2C9*33"
[1] "CYP2C9*45"
[1] "CYP3A4*22"
[1] "rs3812718-T"
[1] "HLA-B*15:02(i)"
[1] "HLA-B*15:02(ii_CHB)"
[1] "HLA-B*40:01(ii_CEU)"
[1] "HLA-B*40:01(ii_CHB)"
[1] "HLA-B*40:01(ii_JPT)"
[1] "HLA-B*40:01(iii_CHS)"
[1] "HLA-A*40:01(iii_MAS/INS)"
[1] "HLA-A*31:01(ii_CEU)"
[1] "HLA-A*31:01(ii_CHB)"
[1] "HLA-A*31:01(ii_JPT)"
[1] "HLA-A*31:01(iii_CHS)"
[1] "HLA-A*31:01(iii_INS)"
[1] "HLA-B*15:11(ii_CHB)"


## folder for comparison

In [30]:
dir.create('./compare_population')

“'./compare_population' already exists”


## Continents vs Rest of the World

In [31]:
EUR_vs_world <- conf_int_v_ALL(1006, cross_country$'EUR (n = 1006)')

AFR_vs_world <- conf_int_v_ALL(1322, cross_country$'AFR (n = 1322)')

AMR_vs_world <- conf_int_v_ALL(694, cross_country$'AMR (n = 694)')

EAS_vs_world <- conf_int_v_ALL(1008, cross_country$'EAS (n = 1008)')

SAS_vs_world <- conf_int_v_ALL(978, cross_country$'SAS (n = 978)')

continent_list <- list (polymorphisms = polymorphisms, EUR_vs_world = EUR_vs_world, 
                 AFR_vs_world = AFR_vs_world, AMR_vs_world = AMR_vs_world, 
                 EAS_vs_world = EAS_vs_world, SAS_vs_world = SAS_vs_world)


continent_vs_world_df <- as.data.frame(do.call(cbind, continent_list)) 

In [32]:
continent_vs_world_df

polymorphisms,EUR_vs_world,AFR_vs_world,AMR_vs_world,EAS_vs_world,SAS_vs_world
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.94 (0.86, 1.04)","1.91 (1.78, 2.05)","0.90 (0.80, 1.01)","0.34 (0.29, 0.40)","1.01 (0.92, 1.11)"
CYP2D6*3,"8.40 (3.81, 18.51)","0.33 (0.10, 1.11)","1.04 (0.36, 2.98)","0.00 (0.00, NaN)","0.32 (0.08, 1.33)"
CYP2D6*4,"2.67 (2.25, 3.17)","0.58 (0.46, 0.73)","1.49 (1.20, 1.85)","0.02 (0.00, 0.07)","1.23 (1.00, 1.51)"
CYP2D6*6,"19.89 (6.81, 58.06)","0.12 (0.02, 0.90)","0.57 (0.13, 2.40)","0.00 (0.00, NaN)","0.18 (0.02, 1.33)"
CYP2D6*9,"10.34 (5.00, 21.38)","0.08 (0.01, 0.58)","2.07 (0.98, 4.39)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*10,"0.82 (0.72, 0.94)","0.39 (0.34, 0.46)","0.60 (0.49, 0.72)","3.68 (3.36, 4.03)","0.65 (0.56, 0.75)"
CYP2D6*14,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","Inf (NaN, Inf)","0.00 (0.00, NaN)"
CYP2D6*17,"0.03 (0.01, 0.11)","100.03 (49.69, 201.37)","0.13 (0.06, 0.29)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*41,"1.68 (1.33, 2.11)","0.23 (0.15, 0.34)","0.95 (0.69, 1.30)","0.54 (0.39, 0.75)","2.48 (1.99, 3.07)"
CYP2C19*2,"0.60 (0.51, 0.71)","0.71 (0.62, 0.81)","0.43 (0.34, 0.54)","1.58 (1.41, 1.76)","1.90 (1.71, 2.11)"


In [33]:
fwrite(continent_vs_world_df, file ="./compare_population/continent_vs_world.csv")

## European Populations vs Rest of the World

In [34]:
CEU_vs_world <- conf_int_v_ALL(198, european$'CEU (n = 198)')

TSI_vs_world <- conf_int_v_ALL(214, european$'TSI (n = 214)')

FIN_vs_world <- conf_int_v_ALL(198, european$'FIN (n = 198)')

GBR_vs_world <- conf_int_v_ALL(182, european$'GBR (n = 182)')

IBS_vs_world <- conf_int_v_ALL(214, european$'IBS (n = 214)')

eur_world_list <- list (polymorphisms = polymorphisms, CEU_vs_world = CEU_vs_world,
                 TSI_vs_world = TSI_vs_world, FIN_vs_world = FIN_vs_world, 
                 GBR_vs_world = GBR_vs_world, IBS_vs_world = IBS_vs_world)

eur_rest_df <- as.data.frame(do.call(cbind, eur_world_list)) 

In [35]:
eur_rest_df

polymorphisms,CEU_vs_world,TSI_vs_world,FIN_vs_world,GBR_vs_world,IBS_vs_world
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.90 (0.73, 1.10)","1.04 (0.87, 1.25)","0.90 (0.73, 1.10)","0.85 (0.68, 1.06)","1.06 (0.89, 1.26)"
CYP2D6*3,"4.05 (1.42, 11.56)","0.00 (0.00, NaN)","8.10 (3.48, 18.82)","7.23 (2.97, 17.62)","1.72 (0.41, 7.21)"
CYP2D6*4,"2.60 (1.98, 3.41)","2.10 (1.57, 2.82)","1.49 (1.04, 2.15)","2.63 (1.98, 3.48)","1.60 (1.14, 2.24)"
CYP2D6*6,"4.86 (1.68, 14.08)","4.48 (1.54, 12.99)","14.58 (6.46, 32.90)","2.41 (0.57, 10.17)","0.97 (0.13, 7.18)"
CYP2D6*9,"3.04 (1.08, 8.50)","5.41 (2.40, 12.20)","2.21 (0.68, 7.14)","6.40 (2.84, 14.42)","3.61 (1.42, 9.20)"
CYP2D6*10,"1.01 (0.78, 1.30)","0.87 (0.66, 1.14)","0.61 (0.44, 0.86)","1.05 (0.81, 1.36)","0.73 (0.54, 0.98)"
CYP2D6*14,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*17,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.15 (0.04, 0.61)"
CYP2D6*41,"1.99 (1.35, 2.94)","2.43 (1.72, 3.43)","0.55 (0.26, 1.14)","1.13 (0.66, 1.94)","1.43 (0.92, 2.22)"
CYP2C19*2,"0.58 (0.41, 0.84)","0.41 (0.27, 0.63)","0.98 (0.75, 1.28)","0.64 (0.44, 0.91)","0.64 (0.46, 0.90)"


In [36]:
fwrite(eur_rest_df, file ="./compare_population/europe_vs_world.csv")

## African Populations vs Rest of the World

In [37]:
YRI_vs_world <- conf_int_v_ALL(216, intra_africa$'YRI (n = 216)')

LWK_vs_world <- conf_int_v_ALL(198, intra_africa$'LWK (n = 198)')

GWD_vs_world <- conf_int_v_ALL(226, intra_africa$'GWD (n = 226)')

MSL_vs_world <- conf_int_v_ALL(170, intra_africa$'MSL (n = 170)')

ESN_vs_world <- conf_int_v_ALL(198, intra_africa$'ESN (n = 198)')

ASW_vs_world <- conf_int_v_ALL(122, intra_africa$'ASW (n = 122)')

ACB_vs_world <- conf_int_v_ALL(192, intra_africa$'ACB (n = 192)')

afr_world_list <- list (polymorphisms = polymorphisms, YRI_vs_world = YRI_vs_world,
                 LWK_vs_world = LWK_vs_world, GWD_vs_world = GWD_vs_world, 
                 MSL_vs_world = MSL_vs_world, ESN_vs_world = ESN_vs_world,
                 ASW_vs_world = ASW_vs_world, ACB_vs_world = ACB_vs_world)

afri_rest_df <- as.data.frame(do.call(cbind, afr_world_list)) 

In [38]:
afri_rest_df

polymorphisms,YRI_vs_world,LWK_vs_world,GWD_vs_world,MSL_vs_world,ESN_vs_world,ASW_vs_world,ACB_vs_world
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"1.60 (1.41, 1.81)","1.86 (1.67, 2.08)","1.51 (1.33, 1.72)","1.75 (1.54, 1.98)","1.61 (1.42, 1.83)","1.17 (0.95, 1.45)","1.40 (1.21, 1.62)"
CYP2D6*3,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","3.08 (0.74, 12.83)","0.93 (0.13, 6.80)"
CYP2D6*4,"0.59 (0.34, 1.02)","0.26 (0.11, 0.63)","0.42 (0.22, 0.80)","0.43 (0.21, 0.90)","0.81 (0.49, 1.32)","1.33 (0.82, 2.16)","0.89 (0.55, 1.44)"
CYP2D6*6,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","1.74 (0.24, 12.79)","0.00 (0.00, NaN)"
CYP2D6*9,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","1.14 (0.16, 8.28)","0.00 (0.00, NaN)"
CYP2D6*10,"0.44 (0.30, 0.65)","0.15 (0.07, 0.30)","0.48 (0.33, 0.69)","0.67 (0.47, 0.95)","0.38 (0.24, 0.59)","0.66 (0.43, 1.00)","0.57 (0.39, 0.81)"
CYP2D6*14,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*17,"5.08 (3.92, 6.59)","3.59 (2.64, 4.90)","3.03 (2.21, 4.17)","5.39 (4.11, 7.08)","5.20 (3.99, 6.77)","2.60 (1.67, 4.05)","3.93 (2.91, 5.31)"
CYP2D6*41,"0.14 (0.04, 0.56)","0.47 (0.21, 1.04)","0.07 (0.01, 0.47)","0.09 (0.01, 0.64)","0.23 (0.08, 0.72)","0.25 (0.06, 1.01)","0.73 (0.38, 1.40)"
CYP2C19*2,"0.75 (0.55, 1.01)","0.96 (0.73, 1.26)","0.59 (0.42, 0.83)","0.79 (0.57, 1.10)","0.93 (0.71, 1.23)","0.62 (0.40, 0.97)","0.67 (0.48, 0.95)"


In [39]:
fwrite(afri_rest_df, file ="./compare_population/africa_vs_world.csv")

## American Populations vs Rest of the World

In [40]:
MXL_vs_world <- conf_int_v_ALL(128, ad_mixed_american$'MXL (n = 128)')

PUR_vs_world <- conf_int_v_ALL(208, ad_mixed_american$'PUR (n = 208)')

CLM_vs_world <- conf_int_v_ALL(188, ad_mixed_american$'CLM (n = 188)')

PEL_vs_world <- conf_int_v_ALL(170, ad_mixed_american$'PEL (n = 170)')

america_world_list <- list (polymorphisms = polymorphisms, MXL_vs_world = MXL_vs_world,
                 PUR_vs_world = PUR_vs_world, CLM_vs_world = CLM_vs_world, 
                 PEL_vs_world = PEL_vs_world)

america_rest_df <- as.data.frame(do.call(cbind, america_world_list)) 

In [41]:
america_rest_df

polymorphisms,MXL_vs_world,PUR_vs_world,CLM_vs_world,PEL_vs_world
<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.71 (0.53, 0.96)","1.09 (0.91, 1.30)","0.82 (0.66, 1.03)","0.90 (0.72, 1.12)"
CYP2D6*3,"0.00 (0.00, NaN)","0.85 (0.12, 6.26)","3.08 (0.94, 10.10)","0.00 (0.00, NaN)"
CYP2D6*4,"1.36 (0.85, 2.16)","1.70 (1.22, 2.37)","1.83 (1.31, 2.55)","0.69 (0.39, 1.23)"
CYP2D6*6,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","2.33 (0.55, 9.84)","0.00 (0.00, NaN)"
CYP2D6*9,"1.09 (0.15, 7.89)","3.72 (1.46, 9.47)","2.33 (0.72, 7.53)","0.00 (0.00, NaN)"
CYP2D6*10,"0.62 (0.41, 0.95)","0.75 (0.56, 1.01)","0.79 (0.58, 1.06)","0.29 (0.17, 0.51)"
CYP2D6*14,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*17,"0.00 (0.00, NaN)","0.16 (0.04, 0.63)","0.18 (0.04, 0.70)","0.19 (0.05, 0.77)"
CYP2D6*41,"0.24 (0.06, 0.96)","1.98 (1.35, 2.90)","1.18 (0.71, 1.98)","0.09 (0.01, 0.64)"
CYP2C19*2,"0.56 (0.35, 0.89)","0.55 (0.39, 0.80)","0.47 (0.31, 0.72)","0.26 (0.14, 0.47)"


In [42]:
fwrite(america_rest_df, file ="./compare_population/america_vs_world.csv")

## East Asian Populations vs Rest of the World

In [43]:
CHB_vs_world <- conf_int_v_ALL(206, east_asian$'CHB (n = 206)')

JPT_vs_world <- conf_int_v_ALL(208, east_asian$'JPT (n = 208)')

CHS_vs_world <- conf_int_v_ALL(210, east_asian$'CHS (n = 210)')

CDX_vs_world <- conf_int_v_ALL(186, east_asian$'CDX (n = 186)')

KHV_vs_world <- conf_int_v_ALL(198, east_asian$'KHV (n = 198)')

east_asia_world_list <- list (polymorphisms = polymorphisms, CHB_vs_world = CHB_vs_world,
                 JPT_vs_world = JPT_vs_world, CHS_vs_world = CHS_vs_world, 
                 CDX_vs_world = CDX_vs_world, KHV_vs_world = KHV_vs_world)

east_asia_rest_df <- as.data.frame(do.call(cbind, east_asia_world_list)) 

In [44]:
east_asia_rest_df

polymorphisms,CHB_vs_world,JPT_vs_world,CHS_vs_world,CDX_vs_world,KHV_vs_world
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.45 (0.33, 0.61)","0.38 (0.27, 0.53)","0.36 (0.26, 0.51)","0.35 (0.24, 0.51)","0.36 (0.25, 0.51)"
CYP2D6*3,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*4,"0.05 (0.01, 0.35)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.05 (0.01, 0.37)"
CYP2D6*6,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*9,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*10,"2.69 (2.38, 3.05)","1.57 (1.30, 1.89)","2.76 (2.44, 3.12)","2.80 (2.48, 3.18)","2.96 (2.64, 3.33)"
CYP2D6*14,"2.59 (0.33, 20.35)","2.56 (0.33, 20.14)","0.00 (0.00, NaN)","11.11 (2.90, 42.63)","24.29 (7.09, 83.23)"
CYP2D6*17,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*41,"0.53 (0.25, 1.10)","0.07 (0.01, 0.52)","0.74 (0.40, 1.38)","1.29 (0.78, 2.12)","0.39 (0.16, 0.93)"
CYP2C19*2,"1.55 (1.27, 1.89)","1.49 (1.21, 1.82)","1.64 (1.35, 1.98)","1.20 (0.94, 1.53)","1.29 (1.03, 1.62)"


In [45]:
fwrite(east_asia_rest_df, file ="./compare_population/east_asia_vs_world.csv")

## South Asian Populations vs Rest of the World

In [46]:
GIH_vs_world <- conf_int_v_ALL(206, south_asian$'GIH (n = 206)')

PJL_vs_world <- conf_int_v_ALL(192, south_asian$'PJL (n = 192)')

BEB_vs_world <- conf_int_v_ALL(172, south_asian$'BEB (n = 172)')

STU_vs_world <- conf_int_v_ALL(204, south_asian$'STU (n = 204)')

ITU_vs_world <- conf_int_v_ALL(204, south_asian$'ITU (n = 204)')

south_asia_world_list <- list (polymorphisms = polymorphisms, GIH_vs_world = GIH_vs_world,
                 PJL_vs_world = PJL_vs_world, BEB_vs_world = BEB_vs_world, 
                 STU_vs_world = STU_vs_world, ITU_vs_world = ITU_vs_world)

south_asia_rest_df <- as.data.frame(do.call(cbind, south_asia_world_list)) 

In [47]:
south_asia_rest_df

polymorphisms,GIH_vs_world,PJL_vs_world,BEB_vs_world,STU_vs_world,ITU_vs_world
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"1.24 (1.06, 1.46)","0.88 (0.71, 1.09)","0.69 (0.53, 0.89)","1.05 (0.88, 1.26)","1.13 (0.95, 1.34)"
CYP2D6*3,"0.86 (0.12, 6.32)","0.00 (0.00, NaN)","1.04 (0.14, 7.62)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*4,"1.38 (0.95, 1.99)","0.83 (0.51, 1.37)","1.33 (0.88, 2.00)","1.17 (0.78, 1.75)","1.22 (0.82, 1.81)"
CYP2D6*6,"0.00 (0.00, NaN)","1.09 (0.15, 8.03)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*9,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*10,"0.63 (0.45, 0.87)","0.43 (0.29, 0.66)","1.07 (0.82, 1.39)","0.64 (0.46, 0.88)","0.72 (0.53, 0.98)"
CYP2D6*14,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*17,"2.64 (1.86, 3.75)","1.92 (1.26, 2.92)","1.61 (1.00, 2.61)","2.09 (1.41, 3.09)","2.47 (1.72, 3.55)"
CYP2D6*41,"6.37 (5.06, 8.00)","6.60 (5.24, 8.30)","6.03 (4.72, 7.71)","8.49 (6.91, 10.44)","7.43 (5.98, 9.22)"
CYP2C19*2,"0.02 (0.00, 0.15)","0.07 (0.02, 0.21)","0.10 (0.04, 0.27)","0.06 (0.02, 0.20)","0.02 (0.00, 0.15)"


In [48]:
fwrite(south_asia_rest_df, file ="./compare_population/south_asia_vs_world.csv")

## DEFINE FUNCTION FOR POPULATION VS REST OF EUROPE

In [49]:
conf_int_v_EUR <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'EUR (n = 1006)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 1006
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## European Populations vs Rest of the European

In [50]:
CEU_vs_europe <- conf_int_v_EUR(198, european$'CEU (n = 198)')

TSI_vs_europe <- conf_int_v_EUR(214, european$'TSI (n = 214)')

FIN_vs_europe <- conf_int_v_EUR(198, european$'FIN (n = 198)')

GBR_vs_europe <- conf_int_v_EUR(182, european$'GBR (n = 182)')

IBS_vs_europe <- conf_int_v_EUR(214, european$'IBS (n = 214)')




intra_europe_list <- list (polymorphisms = polymorphisms, CEU_vs_europe = CEU_vs_europe,
                 TSI_vs_europe = TSI_vs_europe, FIN_vs_europe = FIN_vs_europe, 
                 GBR_vs_europe = GBR_vs_europe, IBS_vs_europe = IBS_vs_europe)

intra_europe_df <- as.data.frame(do.call(cbind, intra_europe_list)) 

In [51]:
intra_europe_df

polymorphisms,CEU_vs_europe,TSI_vs_europe,FIN_vs_europe,GBR_vs_europe,IBS_vs_europe
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.93 (0.74, 1.16)","1.12 (0.92, 1.36)","0.93 (0.74, 1.16)","0.88 (0.69, 1.11)","1.14 (0.93, 1.38)"
CYP2D6*3,"1.09 (0.37, 3.24)","0.00 (0.00, NaN)","2.38 (0.95, 5.97)","2.09 (0.80, 5.42)","0.44 (0.10, 1.87)"
CYP2D6*4,"1.29 (0.96, 1.74)","1.01 (0.73, 1.38)","0.69 (0.47, 1.00)","1.31 (0.97, 1.78)","0.74 (0.52, 1.05)"
CYP2D6*6,"1.02 (0.34, 3.02)","0.93 (0.31, 2.74)","3.34 (1.40, 7.95)","0.50 (0.12, 2.15)","0.19 (0.03, 1.45)"
CYP2D6*9,"0.74 (0.26, 2.13)","1.36 (0.58, 3.20)","0.53 (0.16, 1.75)","1.67 (0.71, 3.91)","0.88 (0.34, 2.31)"
CYP2D6*10,"1.24 (0.93, 1.65)","1.03 (0.77, 1.39)","0.68 (0.48, 0.98)","1.30 (0.97, 1.73)","0.83 (0.60, 1.15)"
CYP2D6*14,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*17,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","Inf (NaN, Inf)"
CYP2D6*41,"1.40 (0.90, 2.17)","1.82 (1.22, 2.72)","0.33 (0.15, 0.70)","0.73 (0.41, 1.28)","0.94 (0.58, 1.52)"
CYP2C19*2,"0.88 (0.60, 1.31)","0.59 (0.38, 0.92)","1.70 (1.24, 2.35)","0.98 (0.66, 1.45)","1.00 (0.69, 1.44)"


In [52]:
fwrite(intra_europe_df, file ="./compare_population/intra_europe.csv")

## DEFINE FUNCTION FOR POPULATION VS REST OF AFRICA

In [53]:
conf_int_v_AFR <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'AFR (n = 1322)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 1322
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## African Populations vs Rest of the Africa

In [54]:
YRI_vs_africa <- conf_int_v_AFR(216, intra_africa$'YRI (n = 216)')

LWK_vs_africa <- conf_int_v_AFR(198, intra_africa$'LWK (n = 198)')

GWD_vs_africa <- conf_int_v_AFR(226, intra_africa$'GWD (n = 226)')

MSL_vs_africa <- conf_int_v_AFR(170, intra_africa$'MSL (n = 170)')

ESN_vs_africa <- conf_int_v_AFR(198, intra_africa$'ESN (n = 198)')

ASW_vs_africa <- conf_int_v_AFR(122, intra_africa$'ASW (n = 122)')

ACB_vs_africa <- conf_int_v_AFR(192, intra_africa$'ACB (n = 192)')




intra_afr_list <- list (polymorphisms = polymorphisms, YRI_vs_africa = YRI_vs_africa,
                 LWK_vs_africa = LWK_vs_africa, GWD_vs_africa = GWD_vs_africa, 
                 MSL_vs_africa = MSL_vs_africa, ESN_vs_africa = ESN_vs_africa,
                 ASW_vs_africa = ASW_vs_africa, ACB_vs_africa = ACB_vs_africa)

intra_africa_df <- as.data.frame(do.call(cbind, intra_afr_list)) 

“NaNs produced”
“NaNs produced”
“NaNs produced”


In [55]:
intra_africa_df

polymorphisms,YRI_vs_africa,LWK_vs_africa,GWD_vs_africa,MSL_vs_africa,ESN_vs_africa,ASW_vs_africa,ACB_vs_africa
<list>,<list>,<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"1.02 (0.89, 1.16)","1.21 (1.07, 1.35)","0.95 (0.83, 1.09)","1.12 (0.99, 1.28)","1.03 (0.90, 1.17)","0.74 (0.59, 0.91)","0.88 (0.76, 1.02)"
CYP2D6*3,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","19.67 (1.80, 215.38)","2.94 (0.27, 32.29)"
CYP2D6*4,"0.90 (0.50, 1.64)","0.38 (0.16, 0.92)","0.61 (0.31, 1.21)","0.65 (0.30, 1.39)","1.31 (0.76, 2.25)","2.27 (1.34, 3.85)","1.47 (0.87, 2.49)"
CYP2D6*6,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","Inf (NaN, Inf)","0.00 (0.00, NaN)"
CYP2D6*9,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","Inf (NaN, Inf)","0.00 (0.00, NaN)"
CYP2D6*10,"0.96 (0.63, 1.46)","0.29 (0.14, 0.60)","1.05 (0.70, 1.57)","1.54 (1.05, 2.26)","0.80 (0.50, 1.28)","1.47 (0.94, 2.30)","1.28 (0.86, 1.89)"
CYP2D6*14,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*17,"1.21 (0.94, 1.57)","0.87 (0.64, 1.18)","0.72 (0.52, 0.98)","1.33 (1.02, 1.73)","1.26 (0.97, 1.63)","0.66 (0.42, 1.02)","0.95 (0.71, 1.28)"
CYP2D6*41,"0.47 (0.11, 1.97)","1.89 (0.76, 4.71)","0.21 (0.03, 1.55)","0.29 (0.04, 2.17)","0.81 (0.24, 2.69)","0.89 (0.21, 3.76)","3.53 (1.57, 7.95)"
CYP2C19*2,"0.98 (0.70, 1.35)","1.30 (0.97, 1.76)","0.75 (0.52, 1.07)","1.04 (0.74, 1.48)","1.26 (0.93, 1.71)","0.80 (0.51, 1.27)","0.87 (0.61, 1.25)"


In [56]:
fwrite(intra_africa_df, file ="./compare_population/intra_africa.csv")

## DEFINE FUNCTION FOR POPULATION VS REST OF ADMIXED AMERICAN

In [57]:
conf_int_v_AMR <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'AMR (n = 694)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 694
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## Admixed America Populations vs Rest of the Americas

In [58]:
MXL_vs_america <- conf_int_v_AMR(216, ad_mixed_american$'MXL (n = 128)')

PUR_vs_america <- conf_int_v_AMR(198, ad_mixed_american$'PUR (n = 208)')

CLM_vs_america <- conf_int_v_AMR(226, ad_mixed_american$'CLM (n = 188)')

PEL_vs_america <- conf_int_v_AMR(170, ad_mixed_american$'PEL (n = 170)')




intra_amr_list <- list (polymorphisms = polymorphisms, MXL_vs_america = MXL_vs_america,
                 PUR_vs_america = PUR_vs_america, CLM_vs_america = CLM_vs_america, 
                 PEL_vs_america = PEL_vs_america)

intra_america_df <- as.data.frame(do.call(cbind, intra_amr_list)) 

In [59]:
intra_america_df

polymorphisms,MXL_vs_america,PUR_vs_america,CLM_vs_america,PEL_vs_america
<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"0.38 (0.27, 0.52)","1.39 (1.12, 1.72)","0.68 (0.52, 0.88)","0.99 (0.77, 1.26)"
CYP2D6*3,"0.00 (0.00, NaN)","0.84 (0.09, 7.98)","6.21 (0.65, 59.39)","0.00 (0.00, NaN)"
CYP2D6*4,"0.48 (0.29, 0.80)","1.38 (0.93, 2.06)","1.09 (0.73, 1.63)","0.43 (0.23, 0.79)"
CYP2D6*6,"0.00 (0.00, NaN)","0.00 (0.00, NaN)","Inf (NaN, Inf)","0.00 (0.00, NaN)"
CYP2D6*9,"0.28 (0.03, 2.20)","3.13 (0.85, 11.54)","1.04 (0.26, 4.10)","0.00 (0.00, NaN)"
CYP2D6*10,"0.50 (0.31, 0.80)","1.40 (0.97, 2.03)","1.07 (0.73, 1.55)","0.41 (0.23, 0.72)"
CYP2D6*14,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*17,"0.00 (0.00, NaN)","1.25 (0.23, 6.78)","1.04 (0.19, 5.61)","1.54 (0.28, 8.34)"
CYP2D6*41,"0.11 (0.03, 0.45)","3.68 (2.03, 6.67)","1.04 (0.56, 1.93)","0.08 (0.01, 0.54)"
CYP2C19*2,"0.63 (0.37, 1.08)","1.42 (0.90, 2.22)","0.80 (0.49, 1.30)","0.50 (0.26, 0.95)"


In [60]:
fwrite(intra_america_df, file ="./compare_population/intra_america.csv")

## DEFINE FUNCTION FOR POPULATION VS REST OF EAST ASIA

In [61]:
conf_int_v_EAS <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'EAS (n = 1008)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 1008
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## East Asian Populations vs Rest of East Asia

In [62]:
CHB_vs_east_asia <- conf_int_v_EAS(206, east_asian$'CHB (n = 206)')

JPT_vs_east_asia <- conf_int_v_EAS(208, east_asian$'JPT (n = 208)')

CHS_vs_east_asia <- conf_int_v_EAS(210, east_asian$'CHS (n = 210)')

CDX_vs_east_asia <- conf_int_v_EAS(186, east_asian$'CDX (n = 186)')

KHV_vs_east_asia <- conf_int_v_EAS(198, east_asian$'KHV (n = 198)')



intra_eas_list <- list (polymorphisms = polymorphisms, CHB_vs_east_asia = CHB_vs_east_asia,
                 JPT_vs_east_asia = JPT_vs_east_asia, CHS_vs_east_asia = CHS_vs_east_asia, 
                 CDX_vs_east_asia = CDX_vs_east_asia, KHV_vs_east_asia = KHV_vs_east_asia)

east_asia_df <- as.data.frame(do.call(cbind, intra_eas_list)) 

In [63]:
east_asia_df

polymorphisms,CHB_vs_east_asia,JPT_vs_east_asia,CHS_vs_east_asia,CDX_vs_east_asia,KHV_vs_east_asia
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"1.24 (0.87, 1.76)","1.00 (0.68, 1.45)","0.94 (0.64, 1.38)","0.91 (0.60, 1.37)","0.92 (0.62, 1.37)"
CYP2D6*3,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*4,"3.89 (0.24, 61.98)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","4.09 (0.26, 65.12)"
CYP2D6*6,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*9,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*10,"1.07 (0.94, 1.21)","0.59 (0.49, 0.71)","1.10 (0.97, 1.24)","1.12 (0.99, 1.28)","1.19 (1.06, 1.35)"
CYP2D6*14,"0.43 (0.06, 3.40)","0.43 (0.05, 3.35)","0.00 (0.00, NaN)","1.89 (0.49, 7.26)","4.09 (1.20, 13.99)"
CYP2D6*17,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*41,"0.88 (0.39, 1.97)","0.10 (0.01, 0.75)","1.36 (0.67, 2.75)","2.88 (1.53, 5.42)","0.62 (0.25, 1.57)"
CYP2C19*2,"1.09 (0.88, 1.36)","1.04 (0.83, 1.30)","1.17 (0.94, 1.44)","0.81 (0.63, 1.06)","0.88 (0.69, 1.13)"


In [64]:
fwrite(east_asia_df, file ="./compare_population/east_asia.csv")

## DEFINE FUNCTION FOR POPULATION VS REST OF SOUTH ASIA

In [65]:
conf_int_v_SAS <- function (b, column_name) {
    # a = population global; b = sub-population specific; i = number with the queried variance
    to_list (for (item in Map(list, column_name, cross_country$'SAS (n = 978)')) {
        i = item[[1]]
        j = item[[2]]
        
        a = 978
        the_rest <- a - b # sub-populations other than the one being queried
        
        q = j - i # number of those among the_rest with queried variant
            
        p1 <- i/b
        p2 <- q/the_rest
        
        prevalence_ratio <- p1/p2
        
        # prevalence_ratio between sub-population of interest and the others
        
        crit_value <- qnorm(p=.05/2, lower.tail=FALSE) # z critical value at 95% CI, 2-tailed, p<0.05
            
        s_p1 <- (b-i)/(b*i)
        s_p2 <- (the_rest - q)/(q * the_rest)
            
        delta <- crit_value * (sqrt(s_p1 + s_p2))
        
        
        lowerCI <- exp((log(prevalence_ratio) - delta))
        upperCI <- exp((log(prevalence_ratio) + delta))
            
        # round off to 2dp
        prevalence_ratio <- format(round(prevalence_ratio, 2), nsmall = 2) 
        lowerCI <- format(round(lowerCI, 2), nsmall = 2) 
        upperCI <- format(round(upperCI, 2), nsmall = 2) 
        
        concat_the_string <- str_c (prevalence_ratio, ' (', lowerCI, ', ', upperCI, ')')
            }
          )
}

## South Asian Populations vs Rest of South Asia

In [66]:
GIH_vs_south_asia <- conf_int_v_SAS(206, south_asian$'GIH (n = 206)')

PJL_vs_south_asia <- conf_int_v_SAS(192, south_asian$'PJL (n = 192)')

BEB_vs_south_asia <- conf_int_v_SAS(172, south_asian$'BEB (n = 172)')

STU_vs_south_asia <- conf_int_v_SAS(204, south_asian$'STU (n = 204)')

ITU_vs_south_asia <- conf_int_v_SAS(204, south_asian$'ITU (n = 204)')



intra_sas_list <- list (polymorphisms = polymorphisms, GIH_vs_south_asia = GIH_vs_south_asia,
                 PJL_vs_south_asia = PJL_vs_south_asia, BEB_vs_south_asia = BEB_vs_south_asia, 
                 STU_vs_south_asia = STU_vs_south_asia, ITU_vs_south_asia = ITU_vs_south_asia)

south_asia_df <- as.data.frame(do.call(cbind, intra_sas_list)) 

“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”


In [67]:
south_asia_df

polymorphisms,GIH_vs_south_asia,PJL_vs_south_asia,BEB_vs_south_asia,STU_vs_south_asia,ITU_vs_south_asia
<list>,<list>,<list>,<list>,<list>,<list>
CYP2D6*2,"1.30 (1.08, 1.56)","0.85 (0.68, 1.07)","0.65 (0.49, 0.85)","1.05 (0.86, 1.29)","1.14 (0.94, 1.39)"
CYP2D6*3,"3.75 (0.24, 59.66)","0.00 (0.00, NaN)","4.69 (0.29, 74.55)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*4,"1.20 (0.79, 1.82)","0.67 (0.40, 1.13)","1.14 (0.73, 1.79)","0.98 (0.63, 1.53)","1.04 (0.67, 1.60)"
CYP2D6*6,"0.00 (0.00, NaN)","Inf (NaN, Inf)","0.00 (0.00, NaN)","0.00 (0.00, NaN)","0.00 (0.00, NaN)"
CYP2D6*9,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*10,"0.90 (0.63, 1.29)","0.58 (0.38, 0.91)","1.72 (1.27, 2.34)","0.91 (0.64, 1.31)","1.06 (0.75, 1.49)"
CYP2D6*14,"NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)","NaN (NaN, NaN)"
CYP2D6*17,"-3.75 (NaN, NaN)","-4.09 (NaN, NaN)","-4.69 (NaN, NaN)","-3.79 (NaN, NaN)","-3.79 (NaN, NaN)"
CYP2D6*41,"5.00 (3.60, 6.94)","5.10 (3.68, 7.06)","4.17 (3.02, 5.74)","9.11 (6.33, 13.09)","6.71 (4.77, 9.43)"
CYP2C19*2,"0.01 (0.00, 0.08)","0.04 (0.01, 0.11)","0.05 (0.02, 0.14)","0.03 (0.01, 0.10)","0.01 (0.00, 0.08)"


In [68]:
fwrite(south_asia_df, file ="./compare_population/south_asia.csv")