# Data visualizations
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 6-MARCH-2024

## Notebook Overview
- ROH plots

In [1]:
## Load packages
module load python
module load R

[+] Loading python 3.10  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading HDF5  1.12.2 
[+] Loading netcdf  4.9.0 
[-] Unloading gcc  11.3.0  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading openmpi/4.1.3/gcc-11.3.0  ... 
[+] Loading pandoc  2.18  on cn1035 
[+] Loading pcre2  10.40 
[+] Loading R 4.3.2 


In [None]:
###################################### AAC ###################################### 

cd ${WORK_DIR}/imputed_data/AAC
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/AAC/ROC/AAC_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### AFR ###################################### 

cd ${WORK_DIR}/imputed_data/AFR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/AFR/ROC/AFR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)

In [None]:
###################################### AJ ###################################### 

cd ${WORK_DIR}/imputed_data/AJ
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/AJ/ROC/AJ_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### AMR ###################################### 

cd ${WORK_DIR}/imputed_data/AMR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/AMR/ROC/AMR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### CAS ###################################### 

cd ${WORK_DIR}/imputed_data/CAS
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/CAS/ROC/CAS_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### EAS ###################################### 

cd ${WORK_DIR}/imputed_data/EAS
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/EAS/ROC/EAS_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### EUR ###################################### 

cd ${WORK_DIR}/imputed_data/EUR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "AMR"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "{WORK_DIR}/imputed_data/EUR/ROC/EUR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)
