# Data visualizations
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 16-NOV-2023

## Notebook Overview
- ROH plots

In [None]:
## Load packages
module load python
module load R

In [None]:
###################################### AAC ###################################### 

cd ${WORK_DIR}/AAC
mkdir ROC

# Load required libraries
library(data.table)
library("ggplot2")

# Read the PRS score data for African ancestry
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

# Store the data for African ancestry
AFR_dat <- dat
AFR_dat$Group <- "AFR"

## --------------

# Read the PRS score data for European ancestry
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

# Store the data for European ancestry
EUR_dat <- dat
EUR_dat$Group <- "EUR"

## --------------

# Read the PRS score data for Latino ancestry
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

# Store the data for Latino ancestry
LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

# Read the PRS score data for East Asian ancestry
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./AAC/ROC/AAC_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### AFR ###################################### 

cd ${WORK_DIR}/AFR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./AFR/ROC/AFR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)

In [None]:
###################################### AJ ###################################### 

cd ${WORK_DIR}/AJ
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./AJ/ROC/AJ_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### AMR ###################################### 

cd ${WORK_DIR}/AMR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./AMR/ROC/AMR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### CAS ###################################### 

cd ${WORK_DIR}/CAS
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./CAS/ROC/CAS_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### EAS ###################################### 

cd ${WORK_DIR}/EAS
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./EAS/ROC/EAS_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)


In [None]:
###################################### EUR ###################################### 

cd ${WORK_DIR}/EUR
mkdir ROC

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_AFRICAN.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

AFR_dat <- dat
AFR_dat$Group <- "AFR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EUROPEANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EUR_dat <- dat
EUR_dat$Group <- "EUR"


## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_LATINO.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

LATINO_dat <- dat
LATINO_dat$Group <- "LATINO"

## --------------

library(data.table)
library("ggplot2")
data <- read.table("PRS_score_release_EASTASIANS.profile", header = T) 
data$CASE <- data$PHENO - 1
dat <- subset(data, CASE != -10)

## Probability of disease calculation
Model <- glm(CASE ~ SCORE, data = dat, family = 'binomial')
dat$probDisease <- predict(Model, dat, type = c("response"))
dat$predicted <- ifelse(dat$probDisease > 0.5, "DISEASE", "CONTROL")
dat$reported <- ifelse(dat$CASE == 1, "DISEASE","CONTROL")

EAS_dat <- dat
EAS_dat$Group <- "EAS"

### style plot
library(plotROC)
to_plot = rbind(AFR_dat, EUR_dat, LATINO_dat, EAS_dat)
combo_rocs_plot <- ggplot(to_plot, aes(d = CASE, m = probDisease, color=Group)) + geom_roc(n.cuts = 0, labels = FALSE) + geom_roc(n.cuts = 0) + style_roc()
ggsave(plot = combo_rocs_plot, filename = "./EUR/ROC/EUR_combo_plotRoc.png", width = 8, height = 5, units = "in", dpi = 300)
