In [64]:
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(tidyverse)

In [2]:
#import collapsed genetable
genetable_collapsed <- na.omit(read.table('hl60_exp1/DAC_genetable_collapsed.txt', header = FALSE, #skip = 4, 
                                  sep = '\t', na.strings = ' '))

genetable_collapsed[4,3:length(genetable_collapsed)] = apply(genetable_collapsed[1:4,3:length(genetable_collapsed)], 2, paste, collapse="_")
colnames(genetable_collapsed) <- genetable_collapsed[4,]
genetable_collapsed = genetable_collapsed[5:dim(genetable_collapsed)[1],] %>% data.frame

In [85]:
threshold = 4
draw_threshold <- function(x){threshold * pseudo_sd * sign(x)/(x)}

In [86]:
rho_df = genetable_collapsed[,
    c('gene', 'rho_ave_Rep1_Rep2_Mann.Whitney.p.value_','rho_ave_Rep1_Rep2_average.phenotype.of.strongest.3_')
      # which (unlist(gregexpr('rho_ave_Rep1_Rep2', colnames(genetable_collapsed))) == 1))
] %>% remove_rownames
rownames(rho_df) = seq(length=nrow(rho_df))


colnames(rho_df) <- c('gene','pvalue','score')


rho_df$score = rho_df$score %>% as.double
rho_df$pvalue = rho_df$pvalue %>% as.double

#create dataframe with pseudogene phenotypes
rho_df$label = '.'

rho_df %>% 
    mutate(label = replace(label, grepl('pseudo', rho_df[[1]], fixed = TRUE), 'pseudo')) -> rho_df

pseudo_sd <- rho_df %>% filter(label=='pseudo') %>% select(score) %>% as.list %>% unlist %>% sd

rho_df %>%
    mutate(label = replace(
        label,score > 0 & label != "pseudo" & score / pseudo_sd * -log10(pvalue) >= threshold,
        'resistance_hit'
    ))-> rho_df
rho_df %>%
    mutate(label = replace(
        label,score < 0 & label != "pseudo" & score / pseudo_sd * -log10(pvalue) <= -threshold,
        'sensitivity_hit'
    )) -> rho_df
rho_df %>%
    mutate(label = replace(label, label == '.', 'gene_non_hit')) -> rho_df

# reorder factors
rho_df$label <- factor(rho_df$label, levels = c('sensitivity_hit', 'resistance_hit','gene_non_hit','pseudo'))

In [87]:
rho_df %>% group_by(label) %>% count

label,n
<fct>,<int>
sensitivity_hit,469
resistance_hit,377
gene_non_hit,19678
pseudo,20524


In [88]:
p <- rho_df %>% drop_na %>%
    ggplot(aes(x=score,y=-1*log10(pvalue)) ) + 
    geom_point(
        data = rho_df %>% filter(label=='pseudo'), 
        alpha=1/100, size = 1, color = 'gray80') +
    geom_point(
        data = rho_df %>% filter(label=='gene_non_hit'), 
        alpha=10/100, size = 1, color = 'gray90') +
    geom_point(
        data = rho_df %>% filter(label=='resistance_hit'), 
        alpha=80/100, size = 2, color = '#fcae91') + 
    geom_point(
        data = rho_df %>% filter(label=='sensitivity_hit'), 
        alpha=80/100, size = 2, color = '#bdd7e7') +
    theme_classic() +
    xlim(-1.0, 1.0) +
    scale_y_continuous(limits = c(0.5,4.5)) +
    xlab(expression('CRISPRi decitabine phenotype (' * rho * ')')) +
    ylab(expression('-log'[10] * '(Mann-Whitney p-value)')) +
    stat_function(fun = draw_threshold, linetype = 'dashed', color = 'black') +
    scale_color_manual(values = c('#3182bd', '#de2d26'),
                       labels = c('Sensitizing Hits', 'Resistance Hits'))

In [108]:
label_sensitivity_hit <- function(p,dd, t_x = -0.2, t_y = -0.1){
    p + geom_point(
        data = dd, 
        size = 3, shape=21,
        stroke=0.5, 
        colour = "grey30", fill = "#3182bd"
    ) + 
    geom_text_repel(data = dd,
                    aes(label=gene),
                    color = 'black', size = 4.5, nudge_x = t_x, nudge_y = t_y)
}

label_resistance_hit <- function(p,dd, t_x = 0.2, t_y = 0.1){
    p + geom_point(
        data = dd, 
        size = 3, shape=21,
        stroke=0.5, 
        colour = "grey30", fill = "#de2d26"
    ) + 
    geom_text_repel(data = dd,
                    aes(label=gene),
                    color = 'black', size = 4.5, nudge_x = t_x, nudge_y = t_y)
}

In [118]:
p1 = p

### Genes to label:
# 1. Decapping proteins: DCP2, DCP1A, DCPS
p1 <- label_sensitivity_hit(p1, rho_df %>% filter(gene %in% c('DCP2','DCP1A', 'DCPS')))
# 2. BCL2 family proteins: DCL2, MCL1
p1 <- label_sensitivity_hit(p1, rho_df %>% filter(gene %in% c('DCL2')))
p1 <- label_sensitivity_hit(p1, rho_df %>% filter(gene %in% c('MCL1')), t_x = -0.3, t_y = -0.4)
# 3. Positive controls: DCK, SLC29A1
p1 <- label_resistance_hit(p1, rho_df %>% filter(gene %in% c('DCK','SLC29A1')),t_y=-0.2)
# 4. m6A writers: METTL3, CBLL1, ZC3H13, KIAA1429
p1 <- label_resistance_hit(p1, rho_df %>% filter(gene %in% c('METTL3', 'KIAA1429')))
p1 <- label_resistance_hit(p1, rho_df %>% filter(gene %in% c('CBLL1')),t_y=+0.2)
p1 <- label_resistance_hit(p1, rho_df %>% filter(gene %in% c('ZC3H13')),t_y=-0.1)
# 5. target genes: SQLE, INTS5, MYBBP1A
p1 <- label_sensitivity_hit(p1, rho_df %>% filter(gene %in% c('SQLE', 'INTS5', 'MYBBP1A')))

In [119]:
p1 <- p1 + theme(axis.text.x = element_text(size = 15, color = 'black'),
          axis.text.y = element_text(size = 15, color = 'black'),
          axis.title.x = element_text(size = 16),
          axis.title.y = element_text(size = 16),
          legend.title = element_blank(),
          legend.text = element_text(size = rel(1.5), color = 'black'),
          legend.position = 'right'
          )

In [120]:
ggsave('plots/CRISPRi-rho_volcano_v3.eps',p1, width = 6, height = 4)
ggsave('plots/CRISPRi-rho_volcano_v3.pdf',p1, width = 6, height = 4)

“Removed 13796 rows containing missing values (geom_point).”
“Removed 12778 rows containing missing values (geom_point).”
“Removed 54 row(s) containing missing values (geom_path).”
“Removed 1 rows containing missing values (geom_point).”
“Removed 1 rows containing missing values (geom_text_repel).”
“semi-transparency is not supported on this device: reported only once per page”
“Removed 13796 rows containing missing values (geom_point).”
“Removed 12778 rows containing missing values (geom_point).”
“Removed 54 row(s) containing missing values (geom_path).”
“Removed 1 rows containing missing values (geom_point).”
“Removed 1 rows containing missing values (geom_text_repel).”


In [84]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /data_gilbert/home/aarab/anaconda3/envs/deseq2/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.7        purrr_0.3.4       
 [5] readr_2.0.2        tidyr_1.1.4        tibble_3.1.6       tidyverse_1.3.1   
 [9] RColorBrewer_1.1-2 ggrepel_0.9.1      ggplot2_3.3.5     

loaded via a namespace (and not attached):
 [1] pbdZMQ_0.3-6     

In [68]:
# no_negs_res <- no_negs[c('DCK', 'SLC29A1', 'METTL3', 'YTHDF2', 'KIAA1429', 'CBLL1', 'ZC3H13'),]
# no_negs_res$cat2 <- ifelse(no_negs_res[[1]] %in% c('DCK', 'SLC29A1'), 'drug', 'm6a')
# no_negs_res <- no_negs[no_negs$V1 %in% c('DCK', 'SLC29A1'),]
# no_negs_sens <- no_negs[c('BCL2', 'MCL1', 'DCP2', 'DCPS', 'EDC3', 'DCP1A', 'CMTR2', 'LSM14A', 'CSDE1', 'PUM1', 'ZC3H18'),]
# no_negs_sens$cat2 <- ifelse(no_negs_sens[[1]] %in% c('BCL2', 'MCL1'), 'bcl2', 'decap')
# no_negs_sens <- no_negs[no_negs$V1 %in% c('BCL2', 'DCP2', 'DCPS', 'EDC3', 'DCP1A'),]
# no_negs_labels <- rbind(no_negs_res, no_negs_sens)
# head(no_negs_labels, n = 15)

# # reorder factors
# no_negs_labels$cat <- factor(no_negs_labels$cat, levels = c('sensitizing_hit', 'resistance_hit'))

# scale_shape_manual(values = c(16, 16, 16, 16), 
#                    labels = c('BCL-2 family', 'mRNA decapping', 'Drug metabolism', 'RNA methylation')) +
# scale_shape_manual(values = c(16, 16), 
#                    labels = c('Sensitizing Hits', 'Resistance Hits')) +
# geom_text_repel(data = no_negs[c('METTL3'),], 
# geom_text_repel(data = no_negs[c('DCK'),], 
# geom_text_repel(data = no_negs[c('SLC29A1'),], 
# geom_text_repel(data = no_negs[c('BCL2'),],
# geom_text_repel(data = no_negs[c('MCL1'),],
# geom_text_repel(data = no_negs[c('DCP2'),],
# geom_text_repel(data = no_negs[c('DCPS'),],
# geom_text_repel(data = no_negs[c('DCP1A'),],
# geom_text_repel(data = no_negs[c('EDC3'),],

# no_negs_res$cat2 <- ifelse(no_negs_res[[1]] %in% c('DCK', 'SLC29A1'), 'drug', 'm6a')
# no_negs_sens <- no_negs[c('TAF6L', 'SUPT20H', 'CCDC101', 'USP22'),]
# no_negs_sens$cat2 <- ('saga')
# no_negs_labels <- rbind(no_negs_res, no_negs_sens)

# head(no_negs_labels, n = 15)

# p <- ggplot(no_negs, aes(x = V29, y = -1 * log(V28, 10), label = V1)) +
# geom_point(data = no_negs, size = 1, color = 'gray90') +
# geom_point(data = resistance_hits, size = 1, color = '#fcae91') + 
# geom_point(data = sensitivity_hits, size = 1, color = '#bdd7e7') +
# theme_classic()+
# xlim(-1.0, 1.0) +
# scale_y_continuous(limits = c(0.5,4.7)) +
# xlab(expression('CRISPRi DNMTi phenotype (' * rho * ')')) +
# ylab(expression('-log'[10] * '(Mann-Whitney p-value)')) +
# theme(axis.text.x = element_text(size = 15, color = 'black'),
#       axis.text.y = element_text(size = 15, color = 'black'),
#       axis.title.x = element_text(size = 20),
#       axis.title.y = element_text(size = 20),
#       legend.title = element_blank(),
#       legend.text = element_text(size = rel(1.5), color = 'black'),
#       legend.position = 'none'
#       ) +

# stat_function(fun = draw_threshold_neg, linetype = 'dashed', color = 'black') +
# stat_function(fun = draw_threshold_pos, linetype = 'dashed', color = 'black')+
# geom_point(data = no_negs_labels, shape = 16, color = '#3182bd', size = 4) +
# geom_text_repel(data = no_negs[c('SUPT20H'),], 
#                 color = 'black', size = 4.5, nudge_x = -0.3, nudge_y = 0.05) +
# geom_text_repel(data = no_negs[c('TAF6L'),], 
#                 color = 'black', size = 4.5, nudge_x = -0.2) +
# geom_text_repel(data = no_negs[c('USP22'),], 
#                 color = 'black', size = 4.5, nudge_x = -0.25, nudge_y = -0.27) +
# geom_text_repel(data = no_negs[c('CCDC101'),], 
#                 color = 'black', size = 4.5, nudge_x = -0.4, nudge_y = -0.1)

# p

# ggsave(file = 'ray_volcano_plot_v2.eps', height = 5.5, width = 5.5)