# Data visualizations
- **Project:** Multi-ancestry PRS
- **Version:** Python/3.9
- **Status:** COMPLETE
- **Last Updated:** 16-NOV-2023

## Notebook Overview
- Forest plots

In [None]:
## Load packages
module load python
module load R

In [None]:
## FOREST PLOTS 

## Mimic the following format for a file:

# COHORT	BETA	SE	L95	U95	P
# AAC	0.4915	0.0898	0.667508	0.315492	4.44E-08
# AFR	0.5417	0.0762	0.691052	0.392348	1.14E-12
# XXX	0.5207	0.0581	0.634576	0.406824	3.13E-19


In [None]:
###################################### EUR ######################################

library(data.table)
library(ggplot2)
dat <- fread("forest_EUR.txt")
level_order <- c('EUR', 'EAS',  'CAS', 'AJ', 'AMR', 'AAC', 'AFR')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)
) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("Nalls et al., 2019 - EUR summary stats") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("EUR.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)


In [None]:
###################################### EAS ######################################

dat <- fread("forest_EAS.txt")
level_order <- c('EUR', 'EAS',  'CAS', 'AJ', 'AMR', 'AAC', 'AFR')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)

) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("Foo et al., 2020 - 23andMe EAS summary stats") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("EAS.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)

In [None]:
###################################### AFR ######################################

dat <- fread("forest_AFR.txt")
level_order <- c('EUR', 'EAS',  'CAS', 'AJ', 'AMR', 'AAC', 'AFR')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)

) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("23andMe AAC summary stats") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AAC.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)


In [None]:
###################################### LAT ######################################

dat <- fread("forest_AMR.txt")
level_order <- c('EUR', 'EAS',  'CAS', 'AJ', 'AMR', 'AAC', 'AFR')
plot <- ggplot(data=dat, aes(x = factor(COHORT,levels = level_order),
                         y = BETA,
                         ymin = U95,
                         ymax = L95)

) +
  geom_pointrange(
    aes(ymin = L95,
        ymax = U95),
    cex = 0.7
  ) +
  geom_hline(
    yintercept = 0,
    linetype = 2) +
  theme(plot.title = element_text(size = 20,
                                  face = "bold"),
        axis.text.y = element_text(size = 8,
                                   face = 'bold'),
        axis.ticks.y = element_blank(),
        axis.text.x = element_text(face="bold"),
        axis.title = element_text(size = 16,
                                  face="bold"),
        legend.position = "none"
  ) +
  xlab(' ') +
  ylab("Beta coefficient") +
  coord_flip() +
  theme_minimal() +
  ggtitle("Loesch et al., 2021 - 23andMe AMR summary stats") +
  theme(plot.title = element_text(hjust=0.5))


ggsave("AMR.jpg", plot = plot,
       width = 8, height = 5, 
       units = "in", # other options c("in", "cm", "mm"), 
       dpi = 500)
