# Estimate Multi-ancestry PRS versus age at onset
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 6-MARCH-2024

## Notebook Overview
- Linear regression models adjusted by covariates (gender, PCs)

In [1]:
## Load packages
module load python
module load R

[+] Loading python 3.10  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading HDF5  1.12.2 
[+] Loading netcdf  4.9.0 
[-] Unloading gcc  11.3.0  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading openmpi/4.1.3/gcc-11.3.0  ... 
[+] Loading pandoc  2.18  on cn1035 
[+] Loading pcre2  10.40 
[+] Loading R 4.3.2 


In [2]:
cd ${WORK_DIR}/quality_control/release6/genotype_qc/
ls *eigenvec

GP2_release6_NOVEMBER_2023_AAC.eigenvec
GP2_release6_NOVEMBER_2023_AFR.eigenvec
GP2_release6_NOVEMBER_2023_AJ.eigenvec
GP2_release6_NOVEMBER_2023_AMR.eigenvec
GP2_release6_NOVEMBER_2023_CAH.eigenvec
GP2_release6_NOVEMBER_2023_CAS.eigenvec
GP2_release6_NOVEMBER_2023_EAS.eigenvec
GP2_release6_NOVEMBER_2023_EUR.eigenvec
GP2_release6_NOVEMBER_2023_FIN.eigenvec
GP2_release6_NOVEMBER_2023_MDE.eigenvec
GP2_release6_NOVEMBER_2023_SAS.eigenvec
GP2_release6_NOVEMBER_AAC_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_AFR_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_AJ_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_AMR_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_CAS_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_EAS_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_EUR_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_FIN_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_MDE_maf_hwe_pca.eigenvec
GP2_release6_NOVEMBER_SAS_maf_hwe_pca.eigenvec


In [None]:
## Change kernel to python
import pandas as pd

# Read the file into a DataFrame
file_path = f"{WORK_DIR}/GP2_master_key_release6.txt"
df = pd.read_csv(file_path, delimiter='\t')  # Assuming columns are tab-separated, adjust if needed

# Add a new column with "0"
df.insert(0, 'New_Column', 0)

# Extract columns GP2sampleID and sex_for_qc
result_df = df[['New_Column', 'GP2sampleID', 'sex_for_qc', 'age_of_onset']]

# Save the result to a CSV file without headers
result_df.to_csv(f'{WORK_DIR}/AAO.txt', index=False, header=True, sep = '\t')

In [1]:
###################################### AAC ###################################### 
cd ${WORK_DIR}/imputed_data/AAC

In [13]:
### AGE AT ONSET
library(data.table)
setwd("./imputed_data/AAC/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AAC_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)
   
library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AAC_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AAC_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AAC_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)


[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.591  -8.393   1.310   8.411  28.467 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  57.7406     3.1376  18.403  < 2e-16 ***
zSCORE        0.4230     0.9902   0.427  0.66993    
sex_for_qc   -0.1402     2.0697  -0.068  0.94610    
PC1          35.9828    57.2969   0.628  0.53104    
PC2          -1.2593    36.2774  -0.035  0.97236    
PC3         404.2793   149.0975   2.712  0.00755 ** 
PC4         -69.4717   114.8560  -0.605  0.54627    
PC5          59.5528    47.7770   1.246  0.21470    
PC6         -97.2830    80.3952  -1.210  0.22832    
PC7         -21.9772    59.3756  -0.370  0.71185    
PC8         -10.5966    56.4851  -0.188  0.85147    
PC9           3.1377    61.1989   0.051  0.95918    
PC10        -67.5448    40.9247  -1.650  0.10112    
---
Signif. c

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.709  -7.891   1.469   8.283  29.096 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  57.4040     3.1210  18.393  < 2e-16 ***
zSCORE        0.9699     0.9812   0.989  0.32461    
sex_for_qc   -0.1100     2.0564  -0.054  0.95740    
PC1          29.2785    57.5918   0.508  0.61200    
PC2           3.4243    36.3023   0.094  0.92499    
PC3         406.0626   148.4803   2.735  0.00706 ** 
PC4         -68.8335   114.3866  -0.602  0.54832    
PC5          62.1379    47.5218   1.308  0.19319    
PC6         -95.4924    80.1526  -1.191  0.23555    
PC7         -15.9093    58.9436  -0.270  0.78764    
PC8          -9.1491    56.0195  -0.163  0.87051    
PC9           5.4034    61.0753   0.088  0.92963    
PC10        -71.7681    41.0818  -1.747  0.08287 .  
---
Signif. c

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
     Min       1Q   Median       3Q      Max 
-30.2900  -8.8532   0.8023   8.2813  29.4277 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  57.1994     3.1262  18.297  < 2e-16 ***
zSCORE        1.2573     1.0343   1.216  0.22621    
sex_for_qc    0.1981     2.0793   0.095  0.92425    
PC1          36.2327    56.7627   0.638  0.52432    
PC2          -4.5590    35.3710  -0.129  0.89763    
PC3         401.9945   148.1017   2.714  0.00749 ** 
PC4         -57.5429   114.7734  -0.501  0.61692    
PC5          61.1109    47.4241   1.289  0.19969    
PC6         -95.6282    79.9806  -1.196  0.23389    
PC7         -22.3377    58.8006  -0.380  0.70461    
PC8         -13.3379    56.0769  -0.238  0.81235    
PC9           1.9585    60.9132   0.032  0.97440    
PC10        -72.5209    40.9625  -1.770  0.07887 .  
---

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-28.974  -8.375   1.542   8.077  29.157 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  57.391044   3.164587  18.135  < 2e-16 ***
zSCORE        0.840429   1.064021   0.790  0.43096    
sex_for_qc   -0.002062   2.075775  -0.001  0.99921    
PC1          29.322967  58.075981   0.505  0.61443    
PC2           0.885762  36.099429   0.025  0.98046    
PC3         401.634306 148.561060   2.703  0.00772 ** 
PC4         -72.712705 114.478356  -0.635  0.52637    
PC5          58.522183  47.678628   1.227  0.22175    
PC6         -93.958802  80.422115  -1.168  0.24469    
PC7         -22.882133  59.108616  -0.387  0.69926    
PC8         -10.405514  56.159103  -0.185  0.85328    
PC9           7.689300  61.408435   0.125  0.90054    
PC10        -63.455390  40.971927  -1.54

In [None]:
###################################### AFR ###################################### 
cd ${WORK_DIR}/imputed_data/AFR

In [14]:
library(data.table)
setwd("./imputed_data/AFR/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AFR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AFR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests) 

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AFR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)  

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AFR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)


[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-45.233  -7.032   0.704   7.734  29.219 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   60.9344     3.9279  15.513   <2e-16 ***
zSCORE        -0.9848     0.9551  -1.031    0.304    
sex_for_qc    -2.6383     2.1130  -1.249    0.214    
PC1          -14.3660    50.6250  -0.284    0.777    
PC2          -86.2966   103.8045  -0.831    0.407    
PC3         -226.3337   261.1430  -0.867    0.387    
PC4           29.3585   201.8040   0.145    0.885    
PC5         -187.4444   191.9295  -0.977    0.330    
PC6          237.6044   169.3622   1.403    0.163    
PC7         -139.7332   139.0419  -1.005    0.317    
PC8           32.0370   122.3948   0.262    0.794    
PC9           70.5150   117.1870   0.602    0.548    
PC10         -34.6793   114.3635  -0.303    0.762    

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-44.826  -7.116   0.667   6.937  30.339 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   60.7387     3.9365  15.429   <2e-16 ***
zSCORE        -0.5909     1.0173  -0.581    0.562    
sex_for_qc    -2.6554     2.1265  -1.249    0.214    
PC1          -18.8389    50.5463  -0.373    0.710    
PC2          -83.3250   103.9972  -0.801    0.424    
PC3         -225.1856   263.1328  -0.856    0.393    
PC4           19.9703   202.2257   0.099    0.921    
PC5         -200.0679   191.8450  -1.043    0.299    
PC6          239.9954   169.7551   1.414    0.159    
PC7         -127.7173   138.6753  -0.921    0.359    
PC8           28.5321   122.6194   0.233    0.816    
PC9           65.1564   117.4672   0.555    0.580    
PC10         -39.4839   114.5218  -0.345    0.731    

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.638  -7.051   0.636   7.003  30.207 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   60.5004     3.9182  15.441   <2e-16 ***
zSCORE         0.1631     1.0580   0.154    0.878    
sex_for_qc    -2.5257     2.1177  -1.193    0.235    
PC1          -18.4519    50.7198  -0.364    0.717    
PC2          -82.9546   104.6316  -0.793    0.429    
PC3         -248.9927   263.8869  -0.944    0.347    
PC4           23.8891   202.5669   0.118    0.906    
PC5         -205.5056   192.4314  -1.068    0.287    
PC6          242.4080   169.8755   1.427    0.156    
PC7         -126.0046   139.3474  -0.904    0.367    
PC8           27.0715   122.7877   0.220    0.826    
PC9           59.2752   117.1860   0.506    0.614    
PC10         -40.0829   114.6370  -0.350    0.727    

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-41.721  -7.156   0.593   7.512  31.376 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   59.9477     3.9044  15.354   <2e-16 ***
zSCORE         1.5106     0.9828   1.537    0.126    
sex_for_qc    -2.3689     2.1040  -1.126    0.262    
PC1          -22.4576    50.2635  -0.447    0.656    
PC2          -88.3898   103.3562  -0.855    0.394    
PC3         -252.8423   259.5813  -0.974    0.332    
PC4           52.6884   201.7998   0.261    0.794    
PC5         -233.2839   191.4844  -1.218    0.225    
PC6          223.4842   169.0374   1.322    0.188    
PC7         -123.5812   137.6060  -0.898    0.371    
PC8           34.8837   121.8807   0.286    0.775    
PC9           44.3813   116.4309   0.381    0.704    
PC10         -36.9586   113.7819  -0.325    0.746    

In [None]:
###################################### AJ ###################################### 
cd ${WORK_DIR}/imputed_data/AJ

In [15]:
library(data.table)
setwd("./imputed_data/AJ/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AJ_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AJ_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AJ_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AJ_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)

[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-45.816  -7.471   1.050   8.060  28.702 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  61.4066     1.1148  55.083   <2e-16 ***
zSCORE       -0.3052     0.3657  -0.835   0.4041    
sex_for_qc   -0.5999     0.7942  -0.755   0.4503    
PC1         -68.4536    40.1677  -1.704   0.0887 .  
PC2         -50.2743    41.3997  -1.214   0.2249    
PC3         -38.3364    22.6861  -1.690   0.0914 .  
PC4          -3.3469    26.8119  -0.125   0.9007    
PC5         -10.9764    29.0980  -0.377   0.7061    
PC6         -38.3612    27.4278  -1.399   0.1623    
PC7           7.3366    25.3020   0.290   0.7719    
PC8          15.9842    25.9176   0.617   0.5376    
PC9          -0.9461    22.8003  -0.041   0.9669    
PC10        -19.2418    25.1614  -0.765   0.4446    
---
Signif. c

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-45.833  -7.277   1.090   7.965  31.857 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  60.7261     1.1106  54.678  < 2e-16 ***
zSCORE       -1.8451     0.3942  -4.681 3.28e-06 ***
sex_for_qc   -0.1535     0.7909  -0.194    0.846    
PC1         -63.8973    39.7221  -1.609    0.108    
PC2         -44.3722    40.9505  -1.084    0.279    
PC3         -32.1180    22.4713  -1.429    0.153    
PC4         -12.1228    26.4964  -0.458    0.647    
PC5          -6.2308    28.7521  -0.217    0.828    
PC6         -34.9715    27.1305  -1.289    0.198    
PC7           6.0784    24.9078   0.244    0.807    
PC8          18.6357    25.5618   0.729    0.466    
PC9          -0.8715    22.5251  -0.039    0.969    
PC10        -19.4943    24.8788  -0.784    0.433    
---
Signif. c

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-46.233  -7.605   0.933   7.986  29.435 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  61.4943     1.1164  55.081   <2e-16 ***
zSCORE       -0.5301     0.3712  -1.428   0.1537    
sex_for_qc   -0.6440     0.7944  -0.811   0.4177    
PC1         -66.4478    40.1451  -1.655   0.0982 .  
PC2         -49.4195    41.3748  -1.194   0.2326    
PC3         -38.5446    22.6705  -1.700   0.0894 .  
PC4          -4.5226    26.7317  -0.169   0.8657    
PC5          -8.9341    29.0525  -0.308   0.7585    
PC6         -37.6596    27.4147  -1.374   0.1699    
PC7           7.0443    25.1989   0.280   0.7799    
PC8          12.1200    25.8572   0.469   0.6394    
PC9           0.5280    22.8194   0.023   0.9815    
PC10        -19.4388    25.1435  -0.773   0.4397    
---
Signif. c

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-47.075  -7.662   1.088   8.087  30.099 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  61.6147     1.1168  55.172   <2e-16 ***
zSCORE       -0.8203     0.3678  -2.230   0.0260 *  
sex_for_qc   -0.6915     0.7936  -0.871   0.3838    
PC1         -69.7094    40.0784  -1.739   0.0823 .  
PC2         -46.5470    41.3437  -1.126   0.2605    
PC3         -36.2481    22.6527  -1.600   0.1099    
PC4          -3.4942    26.6959  -0.131   0.8959    
PC5         -10.7596    29.0029  -0.371   0.7107    
PC6         -35.6566    27.3959  -1.302   0.1934    
PC7           5.8288    25.1321   0.232   0.8166    
PC8          15.6151    25.7815   0.606   0.5449    
PC9           0.2318    22.7445   0.010   0.9919    
PC10        -18.7305    25.1025  -0.746   0.4558    
---
Signif. c

In [None]:
###################################### AMR ###################################### 
cd /data/CARD_training/imputed_data/AMR

In [16]:
library(data.table)
setwd("./imputed_data/AMR/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AMR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AMR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AMR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_AMR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)

[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-40.074  -8.392   0.405  10.420  31.991 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.7990     3.2466  16.571  < 2e-16 ***
zSCORE        0.1694     0.9340   0.181  0.85624    
sex_for_qc   -2.3582     1.8625  -1.266  0.20687    
PC1         -59.2592    47.4365  -1.249  0.21298    
PC2         -64.3330    42.1119  -1.528  0.12811    
PC3          -4.5129    23.6645  -0.191  0.84894    
PC4          30.2422    21.9397   1.378  0.16955    
PC5         -94.3367    31.1612  -3.027  0.00278 ** 
PC6           1.9617    44.0903   0.044  0.96455    
PC7          16.2299    30.2819   0.536  0.59256    
PC8         -49.9967    41.9037  -1.193  0.23417    
PC9          20.0360    38.0172   0.527  0.59874    
PC10         -7.6568    35.9440  -0.213  0.83152    
---
Signif. c

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-39.587  -8.500   0.316  10.191  32.476 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.6721     3.2707  16.410  < 2e-16 ***
zSCORE       -0.2798     0.9780  -0.286  0.77510    
sex_for_qc   -2.2597     1.8830  -1.200  0.23146    
PC1         -60.3661    47.5522  -1.269  0.20569    
PC2         -65.3790    42.2050  -1.549  0.12287    
PC3          -5.1945    23.7097  -0.219  0.82680    
PC4          30.1540    21.9034   1.377  0.17008    
PC5         -94.2931    31.1582  -3.026  0.00279 ** 
PC6           3.8957    43.8520   0.089  0.92930    
PC7          17.3801    30.3063   0.573  0.56693    
PC8         -51.6148    42.5182  -1.214  0.22614    
PC9          22.1297    37.9622   0.583  0.56056    
PC10         -6.0813    36.0453  -0.169  0.86619    
---
Signif. c

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-39.871  -8.243   0.441  10.336  32.519 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.7209     3.2573  16.493  < 2e-16 ***
zSCORE       -0.2242     0.9152  -0.245  0.80670    
sex_for_qc   -2.2988     1.8691  -1.230  0.22012    
PC1         -61.2067    48.0130  -1.275  0.20380    
PC2         -64.2295    42.1134  -1.525  0.12873    
PC3          -4.7457    23.6452  -0.201  0.84113    
PC4          30.6343    22.0499   1.389  0.16621    
PC5         -95.1162    31.3049  -3.038  0.00268 ** 
PC6           4.6391    44.2619   0.105  0.91663    
PC7          17.2748    30.3083   0.570  0.56931    
PC8         -48.3670    41.2536  -1.172  0.24236    
PC9          20.9945    37.7204   0.557  0.57841    
PC10         -6.2470    36.0435  -0.173  0.86257    
---
Signif. c

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-39.605  -8.189   0.377  10.508  32.315 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  53.6365     3.2549  16.479  < 2e-16 ***
zSCORE       -0.5189     0.9210  -0.563  0.57372    
sex_for_qc   -2.2593     1.8652  -1.211  0.22716    
PC1         -60.8945    47.4762  -1.283  0.20104    
PC2         -67.9332    42.5082  -1.598  0.11153    
PC3          -5.2901    23.6538  -0.224  0.82325    
PC4          30.2735    21.8899   1.383  0.16814    
PC5         -92.9693    31.2391  -2.976  0.00326 ** 
PC6           2.6262    43.7124   0.060  0.95215    
PC7          17.3877    30.2072   0.576  0.56550    
PC8         -48.4018    41.2145  -1.174  0.24158    
PC9          21.2309    37.6998   0.563  0.57393    
PC10         -6.7344    35.8301  -0.188  0.85110    
---
Signif. c

In [None]:
###################################### CAS ###################################### 
cd ${WORK_DIR}/imputed_data/CAS

In [17]:
library(data.table)
setwd("./imputed_data/CAS/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_CAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_CAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_CAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_CAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)


[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.904  -6.870   1.218   7.849  22.732 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   44.8625     5.8130   7.718 1.01e-10 ***
zSCORE         0.6007     1.6856   0.356    0.723    
sex_for_qc     3.4245     3.2044   1.069    0.289    
PC1           76.4473   149.1140   0.513    0.610    
PC2          -88.1944   247.4485  -0.356    0.723    
PC3          -76.4607   303.9802  -0.252    0.802    
PC4          -95.0775   167.7765  -0.567    0.573    
PC5         -133.3538   233.0950  -0.572    0.569    
PC6         -106.2452   178.5211  -0.595    0.554    
PC7          172.6984   117.2217   1.473    0.146    
PC8          -60.9779   137.4801  -0.444    0.659    
PC9          129.6686   127.9948   1.013    0.315    
PC10         -15.7134   137.5153  -0.114    0.909    

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-36.054  -6.823   1.478   7.910  22.509 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   45.22022    5.74618   7.870 5.47e-11 ***
zSCORE         0.03466    1.53259   0.023    0.982    
sex_for_qc     3.26436    3.17835   1.027    0.308    
PC1           73.82018  149.07829   0.495    0.622    
PC2          -95.86345  246.75110  -0.389    0.699    
PC3          -83.03728  303.74550  -0.273    0.785    
PC4         -103.42590  166.99648  -0.619    0.538    
PC5         -126.94847  235.58203  -0.539    0.592    
PC6         -107.49662  179.00639  -0.601    0.550    
PC7          170.76759  117.22688   1.457    0.150    
PC8          -62.78993  139.56885  -0.450    0.654    
PC9          135.38905  127.16777   1.065    0.291    
PC10         -16.37189  138.44182  -0.11

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.532  -6.513   1.471   7.944  22.186 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   44.9835     5.7527   7.820 6.71e-11 ***
zSCORE         0.6183     1.5562   0.397    0.692    
sex_for_qc     3.3194     3.1740   1.046    0.300    
PC1           70.0066   149.1999   0.469    0.641    
PC2          -75.0101   251.9844  -0.298    0.767    
PC3          -70.2316   305.0657  -0.230    0.819    
PC4         -100.5474   166.3401  -0.604    0.548    
PC5         -129.6151   232.3142  -0.558    0.579    
PC6         -100.3468   179.2998  -0.560    0.578    
PC7          171.3218   117.0715   1.463    0.148    
PC8          -53.8690   139.3406  -0.387    0.700    
PC9          124.2293   130.0462   0.955    0.343    
PC10         -19.4455   137.7467  -0.141    0.888    

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-36.681  -6.263   0.782   6.988  22.945 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   45.1923     5.7151   7.908 4.69e-11 ***
zSCORE        -0.8173     1.6217  -0.504    0.616    
sex_for_qc     3.4472     3.1897   1.081    0.284    
PC1           65.6659   149.6557   0.439    0.662    
PC2          -91.5214   246.4144  -0.371    0.712    
PC3          -52.0187   309.3325  -0.168    0.867    
PC4         -107.4671   166.3150  -0.646    0.520    
PC5         -126.2773   231.9703  -0.544    0.588    
PC6         -103.5559   178.4729  -0.580    0.564    
PC7          167.5439   117.1433   1.430    0.158    
PC8          -79.8144   141.0271  -0.566    0.573    
PC9          138.6530   126.9721   1.092    0.279    
PC10          -7.9067   138.3204  -0.057    0.955    

In [None]:
###################################### EAS ###################################### 
cd ${WORK_DIR}/imputed_data/EAS

In [18]:
library(data.table)
setwd("./imputed_data/EAS/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EAS_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)   

[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-42.880  -8.755  -1.664   8.608  35.529 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   52.1880     4.3561  11.980   <2e-16 ***
zSCORE        -0.3813     1.0199  -0.374   0.7090    
sex_for_qc     1.5593     2.0045   0.778   0.4376    
PC1           11.7176   304.8525   0.038   0.9694    
PC2         -258.0843   148.3965  -1.739   0.0837 .  
PC3          -81.0767   144.0136  -0.563   0.5741    
PC4           53.1532   127.0944   0.418   0.6763    
PC5         -510.0781   263.7094  -1.934   0.0546 .  
PC6         -106.0814   245.1351  -0.433   0.6657    
PC7          163.8287   122.1261   1.341   0.1814    
PC8          157.9840   137.2520   1.151   0.2512    
PC9         -202.9204   160.4500  -1.265   0.2076    
PC10         -22.7126   137.8008  -0.165   0.8693    

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-42.724  -8.714  -1.562   9.151  36.057 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   51.8533     4.3003  12.058   <2e-16 ***
zSCORE         0.7593     1.0360   0.733   0.4646    
sex_for_qc     1.5228     1.9979   0.762   0.4469    
PC1           22.6568   304.9747   0.074   0.9409    
PC2         -245.3427   149.2103  -1.644   0.1018    
PC3          -80.1935   143.8599  -0.557   0.5779    
PC4           59.9605   126.5290   0.474   0.6361    
PC5         -527.0518   262.9364  -2.004   0.0465 *  
PC6         -100.4984   244.9823  -0.410   0.6821    
PC7          169.9136   121.7384   1.396   0.1645    
PC8          156.8140   137.0526   1.144   0.2540    
PC9         -202.7778   160.2489  -1.265   0.2073    
PC10         -20.5350   137.5301  -0.149   0.8815    

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.220  -8.694  -1.919   8.115  35.577 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   51.9583     4.3010  12.081   <2e-16 ***
zSCORE        -0.5753     0.9494  -0.606   0.5453    
sex_for_qc     1.6938     1.9952   0.849   0.3970    
PC1            3.3132   304.8288   0.011   0.9913    
PC2         -263.9076   148.6404  -1.775   0.0775 .  
PC3          -73.1582   144.4950  -0.506   0.6133    
PC4           63.9696   127.0157   0.504   0.6151    
PC5         -517.0225   262.7532  -1.968   0.0506 .  
PC6         -114.0368   245.3421  -0.465   0.6426    
PC7          164.5518   121.8078   1.351   0.1784    
PC8          153.6150   137.2044   1.120   0.2644    
PC9         -185.6463   162.4915  -1.142   0.2547    
PC10         -18.6633   137.6296  -0.136   0.8923    

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.128  -8.730  -1.617   8.241  36.018 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   52.1005     4.3093  12.090   <2e-16 ***
zSCORE        -0.6081     0.9970  -0.610   0.5427    
sex_for_qc     1.7043     1.9961   0.854   0.3943    
PC1           16.4434   304.8067   0.054   0.9570    
PC2         -260.8698   148.3839  -1.758   0.0804 .  
PC3          -79.6451   143.9371  -0.553   0.5807    
PC4           58.8803   126.5606   0.465   0.6423    
PC5         -502.7589   263.8907  -1.905   0.0583 .  
PC6         -110.0705   245.0704  -0.449   0.6539    
PC7          163.3078   121.8925   1.340   0.1820    
PC8          161.0348   137.3074   1.173   0.2424    
PC9         -191.8437   161.1276  -1.191   0.2353    
PC10         -20.9964   137.5942  -0.153   0.8789    

In [None]:
###################################### EUR ###################################### 
cd ${WORK_DIR}/imputed_data/EUR

In [19]:
library(data.table)
setwd("./imputed_data/EUR/")
temp_data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EUR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print("AFRICANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EUR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EUROPEANS")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EUR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("LATINO")
summary(grsTests)

library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
temp_covs <- read.table("./quality_control/release6/genotype_qc/GP2_release6_NOVEMBER_EUR_maf_hwe_pca.eigenvec", header = T, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./AAO.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
cases <- subset(data, CASE = 1)
meanPop <- mean(cases$SCORE)
sdPop <- sd(cases$SCORE)
cases$zSCORE <- (cases$SCORE - meanPop)/sdPop
grsTests <- lm(age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)
print(noquote(""))
print("EASTASIANS")
summary(grsTests)

[1] "AFRICANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-53.524  -7.944   1.172   8.743  38.758 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  59.0193     0.4010 147.195  < 2e-16 ***
zSCORE       -0.1906     0.1329  -1.435    0.151    
sex_for_qc   -0.5534     0.2747  -2.014    0.044 *  
PC1         119.1812    20.0552   5.943 2.92e-09 ***
PC2         111.2193    28.4676   3.907 9.43e-05 ***
PC3         -22.0769    24.6749  -0.895    0.371    
PC4          33.0008    26.2830   1.256    0.209    
PC5          31.9938    26.8502   1.192    0.233    
PC6          23.0071    20.7157   1.111    0.267    
PC7          -4.5256    23.6566  -0.191    0.848    
PC8          29.1421    24.3743   1.196    0.232    
PC9         -14.6407    24.1396  -0.607    0.544    
PC10         17.2559    19.2811   0.895    0.371    
---
Signif. c

[1] 
[1] "EUROPEANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-54.027  -7.857   1.224   8.695  38.513 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  59.0910     0.4005 147.545  < 2e-16 ***
zSCORE       -0.7041     0.1293  -5.447 5.27e-08 ***
sex_for_qc   -0.5252     0.2743  -1.915   0.0556 .  
PC1         107.8133    20.0043   5.389 7.27e-08 ***
PC2         117.3892    28.4389   4.128 3.70e-05 ***
PC3         -27.5094    24.6539  -1.116   0.2645    
PC4          28.1978    26.2537   1.074   0.2828    
PC5          28.4804    26.8103   1.062   0.2881    
PC6          20.3063    20.6732   0.982   0.3260    
PC7          -3.2560    23.6160  -0.138   0.8903    
PC8          28.9575    24.3316   1.190   0.2340    
PC9          -8.9726    24.1088  -0.372   0.7098    
PC10         11.9904    19.2674   0.622   0.5338    
---
Signif. c

[1] 
[1] "LATINO"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-53.802  -7.956   1.288   8.730  38.635 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  59.0284     0.4010 147.204  < 2e-16 ***
zSCORE       -0.2176     0.1337  -1.628 0.103512    
sex_for_qc   -0.5519     0.2747  -2.009 0.044574 *  
PC1         116.2454    19.9740   5.820 6.12e-09 ***
PC2         109.4165    28.4939   3.840 0.000124 ***
PC3         -21.3912    24.6750  -0.867 0.386013    
PC4          33.5462    26.2801   1.276 0.201820    
PC5          31.9805    26.8491   1.191 0.233641    
PC6          21.8303    20.7061   1.054 0.291780    
PC7          -4.1444    23.6552  -0.175 0.860928    
PC8          28.8631    24.3726   1.184 0.236351    
PC9         -14.2007    24.1335  -0.588 0.556265    
PC10         17.4052    19.2813   0.903 0.366715    
---
Signif. c

[1] 
[1] "EASTASIANS"



Call:
lm(formula = age_of_onset ~ zSCORE + sex_for_qc + PC1 + PC2 + 
    PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, data = cases)

Residuals:
    Min      1Q  Median      3Q     Max 
-53.677  -7.839   1.250   8.732  37.386 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  59.1076     0.4005 147.593  < 2e-16 ***
zSCORE       -0.7776     0.1345  -5.782 7.67e-09 ***
sex_for_qc   -0.5570     0.2742  -2.032 0.042210 *  
PC1         120.2250    19.9446   6.028 1.73e-09 ***
PC2         110.3070    28.4122   3.882 0.000104 ***
PC3         -25.1965    24.6330  -1.023 0.306397    
PC4          33.5213    26.2298   1.278 0.201291    
PC5          32.9554    26.7981   1.230 0.218821    
PC6          24.0024    20.6684   1.161 0.245551    
PC7          -4.9863    23.6101  -0.211 0.832740    
PC8          28.3831    24.3261   1.167 0.243335    
PC9         -15.3837    24.0879  -0.639 0.523069    
PC10         20.5953    19.2525   1.070 0.284767    
---
Signif. c