# Data visualizations
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 1-APRIL-2024

## Notebook Overview
- Forest plots

In [1]:
## Load packages
module load python
module load R

[+] Loading python 3.10  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading HDF5  1.12.2 
[+] Loading netcdf  4.9.0 
[-] Unloading gcc  11.3.0  ... 
[+] Loading gcc  11.3.0  ... 
[+] Loading openmpi/4.1.3/gcc-11.3.0  ... 
[+] Loading pandoc  2.18  on cn4291 
[+] Loading pcre2  10.40 
[+] Loading R 4.3.2 


In [None]:
## FOREST PLOTS 

## Mimic the following format for a file:

# COHORT	BETA	SE	L95	U95	P
# AAC	0.4915	0.0898	0.667508	0.315492	4.44E-08
# AFR	0.5417	0.0762	0.691052	0.392348	1.14E-12
# XXX	0.5207	0.0581	0.634576	0.406824	3.13E-19


In [19]:
###################################### EUR ######################################

library(data.table)
library(ggplot2)
dat <- fread("forest_EUR.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("EUR - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("EUR.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)


In [14]:
###################################### EAS ######################################

library(data.table)
library(ggplot2)
dat <- fread("forest_EAS.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("EAS - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("EAS.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [12]:
###################################### AFR ######################################
library(data.table)
library(ggplot2)
dat <- fread("forest_AFR.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("AFR - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AFR.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [11]:
###################################### LAT ######################################
library(data.table)
library(ggplot2)
dat <- fread("forest_AMR.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("AMR - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AMR.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [9]:
###################################### CAS ######################################
library(data.table)
library(ggplot2)
dat <- fread("forest_CAS.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("CAS - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("CAS.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [8]:
###################################### AAC ######################################
library(data.table)
library(ggplot2)
dat <- fread("forest_AAC.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("AAC - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AAC.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [20]:
###################################### AJ ######################################
library(data.table)
library(ggplot2)
dat <- fread("forest_AJ.txt")
level_order <- c('EUR', 'EAS', 'AMR', 'AAC')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("AJ - Individual level data - GP2 release 6") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AJ.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)