# Plotting Pearson correlations between GT & CS for all deconvolution approaches

### What this does:
- Loads in data across all CS screens on the pearson correlation between GT & CS for
    - Linear model deconvolution
    - Mahalanobis distance deconvolution
    - Boostrapped mahalanobis distance deconvolution
- Formats data for plotting
- Generates a plot comparing the methods

In [1]:
library(tidyverse)
library(ggrepel)
library(cowplot)
library(reshape2)

In [None]:
# Load in the data on correlation between CS & GT across screens
cs_pearson = read.csv("CS_all_pearsons_gt_mahala_vs_mahala_mahalaBoot_regL1_empiricalcov_with_permute_raw_reg.csv")
cs_pearson = cs_pearson %>% filter(!(Metadata_run%in%c("CS_run1","CS_run2")&Metadata_compression==2&Metadata_replicates==7)) # remove screens that had technical failure and were redone in CSrun3

In [None]:
# Reformat the data for plotting

cs_pearson$num_drugs_per_pool = (cs_pearson$Metadata_compression/2) * cs_pearson$Metadata_replicates
cs_pearson$Metadata_replicates_factor = factor(cs_pearson$Metadata_replicates,levels=c(3,5,7))

# Melting the data groups
pearson_values = cs_pearson[,c(6,7,8)]
non_pearson_values = cs_pearson[,-c(6,7,8)]
for(j in 1:ncol(non_pearson_values)){
  non_pearson_values[,j] = factor(non_pearson_values[,j])
}
new_cs_pearson = cbind(non_pearson_values,pearson_values)
melted_cs_pearson = melt(new_cs_pearson,variable.name ='comparison',value.name='pearson')


In [None]:
# Plot the reformatted data

melted_cs_pearson$num_drugs_per_pool = as.numeric(as.character(melted_cs_pearson$num_drugs_per_pool))

pdf("pearson_plots/pearson_comparison_empiricalcov_lm.pdf",useDingbats = F,width=4,height=2.5)
ggplot(melted_cs_pearson, aes(x =num_drugs_per_pool,y=pearson,color=comparison)) + geom_smooth(aes(group=comparison),method="lm") + geom_point()  + theme_classic() + ylim(0,1)  +theme(legend.text = element_blank()) +
    theme(axis.text.x = element_text(size=8),axis.text.y = element_text(size=8),axis.title = element_text(size=8),plot.title=element_text(size=10))+ scale_x_continuous(trans='log2') +
  ylab("Pearson correlation") + xlab("Number of drugs in each pool")
dev.off()