<a href="https://colab.research.google.com/github/KoalaQin/gnomad_lof/blob/master/post_v4_stats_plots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### variants change from VUS to LB using EUR AF and grpmax in non-EUR

In [2]:
%load_ext rpy2.ipython

In [7]:
%%R
library(tibble)

color_amr = k_amr = '#ED1E24'
color_eur = k_eur = '#6AA5CD'
color_afr = k_afr = '#941494'
color_sas = k_sas = '#FF9912'
color_eas = k_eas = '#108C44'
color_oth = k_oth = '#ABB9B9'
color_mde = k_mde = '#33CC33'
color_asj = k_asj = 'coral'

color_nfe = k_nfe = color_eur
color_fin = k_fin = '#002F6C'

color_mde = '#000080'
color_asj = 'gold2'

exac.colors <- c(color_eas, color_sas, color_mde, color_afr, color_amr, color_asj, color_oth, color_eur)
ancestries <- c("East Asian", "South Asian", "Middle Eastern", "African", "Admixed American", "Ashkenazi Jewish", "Remaining", "European")
names(exac.colors) = ancestries

gnomadv2.var <- c(60371, 71089, 0, 104692, 70962, 66611, 75826, 58348)
gnomadv4.var <- c(65222, 80879, 93553, 118500, 90691, 75910, 91847, 64635)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.001, type='n_var_genanc_AF_over')

gnomadv2.var <- c(312647, 287696, 0, 400340, 316861, 136678, 493195, 190663)
gnomadv4.var <- c(330657, 347035, 557787, 416452, 376149, 142065, 347880, 213102)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.0001, type='n_var_genanc_AF_over') %>% union_all(counts)

gnomadv2.var <- c(41467, 71690, 0, 74752, 78021, 53376, 80821, 79884)
gnomadv4.var <- c(48231, 73634, 69636, 76857, 76856, 62886, 78120, 78634)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.001, type='n_var_global_AF_over') %>% union_all(counts)

gnomadv2.var <- c(102035, 178990, 0, 179128, 219695, 90145, 209627, 242026)
gnomadv4.var <- c(110515, 198850, 153224, 215937, 216258, 107368, 282295, 277615)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.0001, type='n_var_global_AF_over') %>% union_all(counts)

gnomadv2.var <- c(18320, 21488, 0, 48395, 23276, 26765, 24018, 0)
gnomadv4.var <- c(30385, 33203, 47266, 71118, 32780, 32129, 33404, 0)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.001, type='n_var_EUR_AF_under_genanc_AF_over') %>% union_all(counts)

gnomadv2.var <- c(89588, 86321, 0, 122967, 106685, 40461, 194222, 0)
gnomadv4.var <- c(142722, 159763, 313397, 180134, 181921, 55217, 160593, 0)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.var, `gnomAD v4`=gnomadv4.var,
                AF=0.0001, type='n_var_EUR_AF_under_genanc_AF_over') %>% union_all(counts)

gnomadv2.n <- c(9977, 15308, 0, 12487, 17720, 5185, 3614, (12562+64603))
gnomadv3.n <- c(2604, 2419, 158, 20744, 7647, 1736, 456+1047, 5316+34029)
gnomadv4.n <- c(19850, 43129, 2884, 16740, 22362, 13068, 30198, 556006+26710)
counts = tibble(anc=ancestries, `gnomAD v2`=gnomadv2.n, `gnomAD v4`=gnomadv4.n+gnomadv3.n,
                AF=0, type='sample_size') %>% union_all(counts)

order1 = c("East Asian", "South Asian", "Middle Eastern", "African", "Admixed American", "Ashkenazi Jewish", "Remaining", "European")
order2 = c("European", "East Asian", "South Asian", "Middle Eastern", "African", "Admixed American", "Ashkenazi Jewish", "Remaining")

data1 = counts %>%
  pivot_longer(c(-anc, -AF, -type)) %>%
  mutate(anc=fct_relevel(anc, rev(order1)))

blog_p = data1 %>%
  filter(type %in% c('sample_size', 'n_var_genanc_AF_over') & AF != 1e-4) %>%
  filter(name == 'gnomAD v4') %>%
  mutate(type=if_else(type == 'sample_size', 'Number of individuals', 'Number of non-syn variants AF > 0.1%')) %>%
  ggplot + aes(x = '', y = value, fill = anc) +
  geom_bar(stat='identity') +
  scale_y_continuous(labels=comma, expand=expansion(c(0, 0.1),0)) +
  scale_x_discrete(labels = NULL) +
  xlab(NULL) + ylab(NULL) + # ylab('Number of variants') +
  scale_fill_manual(values=exac.colors, name=NULL) +
  facet_wrap(~type, scales='fixed') +
  theme(legend.position = 'bottom',
        legend.background=element_rect(fill = alpha("white", 0)))

pdf('v4_samples_and_variants.pdf', height=4.5, width=5.5)
print(blog_p)
dev.off()

png('v4_samples_and_variants.png', height=4.5, width=5.5, res=300, units='in')
print(blog_p)
dev.off()

library(gganimate)
out = data1 %>%
  filter(type %in% c('sample_size', 'n_var_genanc_AF_over') & AF != 1e-4) %>%
  mutate(type=if_else(type == 'sample_size', 'Number of individuals', 'Number of non-syn variants AF > 0.1%')) %>%
  ggplot + aes(x = '', y = value, fill = anc) +
  geom_bar(stat='identity') +
  transition_states(name, wrap=F) +
  scale_y_continuous(labels=comma, expand=expansion(c(0, 0.1),0)) +
  scale_x_discrete(labels = NULL) +
  xlab(NULL) + ylab(NULL) + # ylab('Number of variants') +
  scale_fill_manual(values=exac.colors, name=NULL) +
  facet_wrap(~type, scales='fixed') +
  theme(legend.position = 'bottom',
        legend.background=element_rect(fill = alpha("white", 0)))

width = 5.5
height = 4.5
ren = animate(out, duration = 3, fps = 10, width = width, height = height, res = 300, units='in',
              renderer = gifski_renderer(loop = FALSE), rewind=FALSE)
anim_save("v2_v4_samples_variants.gif", ren)
ren_loop = animate(out, duration = 3, fps = 10, width = width, height = height, res = 300, units='in',
                   renderer = gifski_renderer(loop = TRUE), rewind=FALSE)
anim_save("v2_v4_samples_variants_loop.gif", ren)

counts %>%
  pivot_longer(c(-anc, -AF, -type)) %>%
  mutate(anc=fct_relevel(anc, rev(ancestries)),
         anc=fct_relevel(anc, 'Middle Eastern'),
         anc=fct_relevel(anc, 'European', after=Inf)) %>%
  filter(anc != 'Remaining' & type == 'n_var_genanc_AF_over' & AF == 1e-3) %>%
  ggplot + aes(x = name, y = value, fill = anc) +
  geom_bar(stat='identity') +
  scale_y_continuous(labels=comma, expand=c(0,0)) +
  xlab(NULL) + ylab('Number of variants') +
  scale_fill_manual(values=exac.colors, name=NULL) +
  theme(legend.position = 'right',
        legend.background=element_rect(fill = alpha("white", 0))) -> p
png('genanc_over_0.001_v2_v4.png', height=4.5, width=4.5, res=300, units='in')
print(p)
dev.off()

counts %>%
  pivot_longer(c(-anc, -AF, -type)) %>%
  mutate(anc=fct_relevel(anc, rev(ancestries)),
         anc=fct_relevel(anc, 'Middle Eastern')) %>%
  filter(!(anc %in% c('Remaining', 'European')) &
           type == 'n_var_EUR_AF_under_genanc_AF_over' & AF == 1e-3) %>%
  ggplot + aes(x = name, y = value, fill = anc) +
  geom_bar(stat='identity') +
  scale_y_continuous(labels=comma, expand=c(0,0)) +
  xlab(NULL) + ylab('Number of variants') +
  scale_fill_manual(values=exac.colors, name=NULL) +
  theme(legend.position = 'right',
        legend.background=element_rect(fill = alpha("white", 0))) -> p
png('genanc_over_0.001_EUR_under_0.001_v2_v4.png', height=4.5, width=4.5, res=300, units='in')
print(p)
dev.off()

counts %>%
  pivot_longer(c(-anc, -AF, -type)) %>%
  mutate(anc=fct_relevel(anc, rev(ancestries))) %>%
  filter(!(anc %in% c('Remaining', 'European', 'Middle Eastern', 'Ashkenazi Jewish')) &
           type == 'n_var_EUR_AF_under_genanc_AF_over' & AF == 1e-4) %>%
  ggplot + aes(x = name, y = value, fill = anc) +
  geom_bar(stat='identity') +
  scale_y_continuous(labels=comma, expand=c(0,0)) +
  xlab(NULL) + ylab('Number of variants') +
  scale_fill_manual(values=exac.colors, name=NULL) +
  theme(legend.position = 'right',
        legend.background=element_rect(fill = alpha("white", 0))) -> p
png('genanc_over_1e-4_EUR_under_1e-4_v2_v4.png', height=4.5, width=4.5, res=300, units='in')
print(p)
dev.off()

counts %>%
  pivot_longer(c(-anc, -AF, -type)) %>%
  mutate(anc=fct_relevel(anc, rev(ancestries))) %>%
  group_split(AF, type) %>%
  map(~{plots=ggplot(data=.) + aes(x = name, y = value, fill = anc) +
    geom_bar(stat='identity') +
    scale_y_continuous(labels=comma, expand=expansion(c(0,0),0)) +
    xlab(NULL) + ylab('Number of variants') +
    scale_fill_manual(values=exac.colors, name=NULL) +
    theme(legend.position = 'right', # c(0.01, 0.99),
          # legend.justification = c(0, 1),
          legend.background=element_rect(fill = alpha("white", 0)))
  ggsave(sprintf('%s_%s.png', .x$type[1], .x$AF[1]), plots, width=4.5, height=4)
  })

v4_16 = tribble(~anc, ~count,
                "afr",	407293,
                "amr",	363936,
                "eas",	293642,
                "fin",	136023,
                "nfe",	240647,
                "remaining",	367079,
                "sas",	375877
)

v4_16 %>%
  filter(anc != 'remaining') %>%
  ggplot + aes(x = anc, y = count, fill=anc) +
  geom_bar(stat='identity') +
  scale_y_continuous(labels=comma, expand=c(0,0)) +
  scale_x_discrete(labels=pop_names) +
  xlab(NULL) + ylab('Number of variants') +
  scale_fill_manual(values=pop_colors, name=NULL, guide=F) +
  theme(legend.position = c(0.01, 0.99), legend.justification = c(0, 1),
        legend.background=element_rect(fill = alpha("white", 0)),
        axis.text.x = element_text(angle = 30, hjust = 1))





Error in union_all(., counts) : could not find function "union_all"
