# Making plots for CS vs GT pearson correlation at different permutation test p values and the TPR/FPR plots across perumtation testing p vlaues

### What this does
- Loads in results from applying different permutation testing pvalues (0.01 to 1 by steps of 0.01) to deconvolution of each screen
- Makes Pearson and Spearman correlation plots for pvalue thresholds at 0.001, 0.01, and 0.05
- Makes TPR vs FPR plots

In [1]:
library(tidyverse)
library(cowplot)
library(viridis)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.2.1     [32m✔[39m [34mpurrr  [39m 0.3.3
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 0.8.5
[32m✔[39m [34mtidyr  [39m 1.0.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

********************************************************
Note: As of version 1.0.0, cowplot does not change the
  default ggplot2 theme anymore. To recover the previous
  behavior, execute:
  theme_set(theme_cowplot())
********************************************************

Loading required package: viridisLite


Loading in results from applying different permutation testing pvalues (0.01 to 1 by steps of 0.01) to deconvolution of each screen

In [None]:
sig_levels = seq(0.0,0.99,by=0.01)

i = 1
threshold = sig_levels[i]
reg_hit_stats = read.csv(paste0("regression_summary_stats/summary_stats_from_all_reg_sig_coef_permute_0.0_Dec102021.csv"))
reg_hit_stats = reg_hit_stats %>%filter(!((Metadata_run%in%c("CS_run3"))&Metadata_compression==2&Metadata_replicates==7))
reg_hit_stats$pool_size =(reg_hit_stats$Metadata_compression/2)*reg_hit_stats$Metadata_replicates
reg_hit_stats$Metadata_compression_fctr = factor(reg_hit_stats$Metadata_compression,levels=c(2.0,4.0,8.0,16.0,32.0))
reg_hit_stats$Metadata_replicates_fctr = factor(reg_hit_stats$Metadata_replicates,levels=c(3,5,7))
reg_hit_stats$permute_threshold = threshold

for(i in 2:length(sig_levels)){
  threshold = sig_levels[i]
  temp = read.csv(paste0("regression_summary_stats/summary_stats_from_all_reg_sig_coef_permute_",threshold,"_Dec102021.csv"))
  temp = temp %>%filter(!((Metadata_run%in%c("CS_run3"))&Metadata_compression==2&Metadata_replicates==7))
  temp$pool_size =(temp$Metadata_compression/2)*temp$Metadata_replicates
  temp$Metadata_compression_fctr = factor(temp$Metadata_compression,levels=c(2.0,4.0,8.0,16.0,32.0))
  temp$Metadata_replicates_fctr = factor(temp$Metadata_replicates,levels=c(3,5,7))
  temp$permute_threshold = threshold
  
  reg_hit_stats = rbind(reg_hit_stats,temp)
  
}

reg_hit_stats$pool_size = factor(reg_hit_stats$pool_size)

Making Pearson and Spearman correlation plots for pvalue thresholds at 0.001, 0.01, and 0.05

In [None]:
data = reg_hit_stats %>% filter(permute_threshold %in% c(0.001,0.01, 0.05))

pdf("regression_summary_stats/pearson_by_stringency.pdf",width = 2.75,height = 2.5,useDingbats = F)
ggplot(data,aes(x=num_drugs_per_pool,y=pearson_gt_mahala_cs_perm_reg_l1_norm,color=permute_threshold)) +
  geom_point() + scale_x_continuous(trans='log2') + ylab('Pearson correlation') +
  xlab("Number of perturbations in pool") + theme_cowplot() + geom_smooth() + ylim(-0.1,1) +
  ggtitle("Pearson correlation between the GT Mahalanobis distances &\nthe L1 norms of regression coeficients ")+
  theme(plot.title = element_text(size=10,face="plain"),axis.title = element_text(size=10),axis.text = element_text(size=8),legend.position = "None")
dev.off()

pdf("regression_summary_stats/spearman_by_stringency.pdf",width = 2.75,height = 2.75,useDingbats = F)
ggplot(data,aes(x=num_drugs_per_pool,y=spearman_gt_mahala_cs_perm_reg_l1_norm,color=permute_threshold)) +
  geom_point() + scale_x_continuous(trans='log2') + ylab('Spearman correlation') +
  xlab("Number of perturbations in pool") + theme_cowplot() + geom_smooth() + ylim(-0.1,1) +
  ggtitle("Spearman correlation between the GT Mahalanobis distances &\nthe L1 norms of regression coeficients ")+
  theme(plot.title = element_text(size=10,face="plain"),axis.title = element_text(size=10),axis.text = element_text(size=8),legend.position = "None")
dev.off()

Making TPR/FPR plots

In [None]:
pdf("regression_summary_stats/tpr_fpr_roc_curves_run1.pdf",width = 3.5,height = 3.5,useDingbats = F)
partA = reg_hit_stats %>% filter(Metadata_run =="CS_run1")
partB = reg_hit_stats %>% filter(Metadata_run =="CS_run3") %>% filter(Metadata_scheme=="random1")
data = rbind(partA,partB) %>% filter()
data$pool_color = as.numeric(as.character(data$pool_size))
ggplot(data,aes(y=tpr,x= fpr,color=pool_color)) +geom_line(aes(group=pool_size))+ theme_classic() + ylim(0,1) + xlim(0,0.65) +
  ggtitle("Run 1") + scale_color_viridis(discrete = F,trans="log2") +labs(color="Pool size") + geom_abline(linetype = "dashed")
dev.off()

pdf("regression_summary_stats/tpr_fpr_roc_curves_run2.pdf",width = 3.5,height = 3.5,useDingbats = F)
partA = reg_hit_stats %>% filter(Metadata_run =="CS_run2")
partB = reg_hit_stats %>% filter(Metadata_run =="CS_run3") %>% filter(Metadata_scheme=="random2")
data = rbind(partA,partB) %>% filter()
data$pool_color = as.numeric(as.character(data$pool_size))
ggplot(data,aes(y=tpr,x= fpr,color=pool_color)) +geom_line(aes(group=pool_size))+ theme_classic() + ylim(0,1) + xlim(0,0.65) +
  ggtitle("Run 2") + scale_color_viridis(discrete = F,trans="log2") +labs(color="Pool size")+ geom_abline(linetype = "dashed")
dev.off()