# Estimate Multi-ancestry PRS versus PD risk - AAC
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 16-NOV-2023

## Notebook Overview
- Logistic regression models adjusted by covariates (age, gender, PCs)

In [None]:
## Load packages
module load python
module load R

[+] Loading python 3.10  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading HDF5  1.12.2 
[+] Loading netcdf  4.9.0 
[-] Unloading gcc  11.3.0  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading openmpi/4.1.3/gcc-11.3.0  ... 
[+] Loading pandoc  2.18  on cn1663 
[+] Loading pcre2  10.40 
[+] Loading R 4.3.0 


In [None]:
###################################### AAC ######################################

In [None]:
cd ${WORK_DIR}/AAC

In [None]:
### RISK
R
library(data.table)
temp_data <- read.table("PRS_score_release_AFRICAN.profile", header = T)
temp_covs <- read.table("{GP2_release5}/raw_genotypes/AAC/AAC_release5_maf_hwe_pca.eigenvec", header = F, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./covariates.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)
dat$sex_for_qc[dat$sex_for_qc =="Male"] <- 1
dat$sex_for_qc[dat$sex_for_qc =="Female"] <- 2
dat$sex_for_qc <- as.numeric(dat$sex_for_qc)
meanControls <- mean(dat$SCORE[dat$CASE == 0])
sdControls <- sd(dat$SCORE[dat$CASE == 0])
dat$zSCORE <- (dat$SCORE - meanControls)/sdControls
grsTests <- glm(CASE ~ zSCORE + sex_for_qc + age + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, family="binomial", data = dat)
summary(grsTests)

In [None]:
library(data.table)
temp_data <- read.table("PRS_score_release_EUROPEANS.profile", header = T)
temp_covs <- read.table("{GP2_release5}/raw_genotypes/AAC/AAC_release5_maf_hwe_pca.eigenvec", header = F, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./covariates.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)
dat$sex_for_qc[dat$sex_for_qc =="Male"] <- 1
dat$sex_for_qc[dat$sex_for_qc =="Female"] <- 2
dat$sex_for_qc <- as.numeric(dat$sex_for_qc)
meanControls <- mean(dat$SCORE[dat$CASE == 0])
sdControls <- sd(dat$SCORE[dat$CASE == 0])
dat$zSCORE <- (dat$SCORE - meanControls)/sdControls
grsTests <- glm(CASE ~ zSCORE + sex_for_qc + age + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, family="binomial", data = dat)
summary(grsTests)

In [None]:
library(data.table)
temp_data <- read.table("PRS_score_release_LATINO.profile", header = T)
temp_covs <- read.table("{GP2_release5}/raw_genotypes/AAC/AAC_release5_maf_hwe_pca.eigenvec", header = F, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./covariates.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)
dat$sex_for_qc[dat$sex_for_qc =="Male"] <- 1
dat$sex_for_qc[dat$sex_for_qc =="Female"] <- 2
dat$sex_for_qc <- as.numeric(dat$sex_for_qc)
meanControls <- mean(dat$SCORE[dat$CASE == 0])
sdControls <- sd(dat$SCORE[dat$CASE == 0])
dat$zSCORE <- (dat$SCORE - meanControls)/sdControls
grsTests <- glm(CASE ~ zSCORE + sex_for_qc + age + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, family="binomial", data = dat)
summary(grsTests)

In [None]:
library(data.table)
temp_data <- read.table("PRS_score_release_EASTASIANS.profile", header = T)
temp_covs <- read.table("{GP2_release5}/raw_genotypes/AAC/AAC_release5_maf_hwe_pca.eigenvec", header = F, sep="\t")
colnames(temp_covs) <- c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8", "PC9", "PC10")
temp_covs_2 <- read.table("./covariates.txt", header = T, sep="\t")
colnames(temp_covs_2)[2] ="IID"
covs <- merge(temp_covs, temp_covs_2, by = "IID")
data <- merge(temp_data, covs, by = "IID")
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)
dat$sex_for_qc[dat$sex_for_qc =="Male"] <- 1
dat$sex_for_qc[dat$sex_for_qc =="Female"] <- 2
dat$sex_for_qc <- as.numeric(dat$sex_for_qc)
meanControls <- mean(dat$SCORE[dat$CASE == 0])
sdControls <- sd(dat$SCORE[dat$CASE == 0])
dat$zSCORE <- (dat$SCORE - meanControls)/sdControls
grsTests <- glm(CASE ~ zSCORE + sex_for_qc + age + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, family="binomial", data = dat)
summary(grsTests)