# CS hits dotplot

CK 2022

### What this does?
Makes the dot plot summarizing which drugs were consistent hits across CS screens

In [None]:
library(tidyverse)
library(ggrepel)
library(cowplot)
library(viridis)
library(scico)
library(RColorBrewer)

Formatting the deconvolution data for dotplot

In [None]:
```{r}
drug_cluster_colors = c('#EFAD81','#EF6F18',
                       '#844E27','#6153A3','#C5E2BC','#437548',
                       '#8F9E35',
                       '#7AC4B9','#12544D',
                       '#CCA754')
all_gt_leiden_phenotypes = c("2-lo", "2-hi","3","4","5","6","6-8", "7-lo", "7-hi",   "8" )

top_drugs = read.csv("regression_summary_stats/regression_metrics_permute_0.01_Dec102021.csv")
top_drugs$Metadata_perturbation[top_drugs$Metadata_perturbation=="Methyldopa Sesquihydrate (L-_-Methyl-Dopa Sesquihyrate)"] = "Methyldopa Sesquihydrate"

top_drugs = top_drugs %>% filter(permute_reg_l1_norm > 0)
top_drugs$num_drugs_per_pool = top_drugs$Metadata_compresion/2 * top_drugs$Metadata_replicates

# Normalize the l1 norms

top_l1_data = top_drugs %>% group_by(Metadata_run,Metadata_compresion,Metadata_replicates) %>% summarize(max_l1 =max(permute_reg_l1_norm))

top_drugs$normalized_l1_norm = top_drugs$permute_reg_l1_norm
for(i in 1:nrow(top_drugs)){
  run = top_drugs$Metadata_run[i]
  compression = top_drugs$Metadata_compresion[i]
  replicate = top_drugs$Metadata_replicates[i]
  
  temp=top_l1_data %>% filter(Metadata_run==run) %>%
  filter(Metadata_compresion ==compression) %>%
  filter(Metadata_replicates==replicate)
  max_l1 = temp$max_l1
  
  top_drugs$normalized_l1_norm[i] = top_drugs$normalized_l1_norm[i]/max_l1
  
}

# Find the drugs that are hits for both replicates
counts_by_drug_and_scheme = top_drugs %>% group_by(Metadata_perturbation,num_drugs_per_pool) %>% summarize(n=n())
drugs_hit_at_both_reps = unique(counts_by_drug_and_scheme$Metadata_perturbation[counts_by_drug_and_scheme$n > 1])

top_drugs = top_drugs %>% filter(Metadata_perturbation %in% drugs_hit_at_both_reps)

# Finding the drug counts
temp = top_drugs %>% group_by(Metadata_perturbation) %>% summarize(n=n(),mean_l1=mean(normalized_l1_norm))%>% arrange(desc(n),desc(mean_l1))
# temp = top_drugs %>% group_by(Metadata_perturbation) %>% summarize(n=n(),mean_l1=mean(normalized_l1_norm))%>% arrange(desc(mean_l1))
drug_count_order = temp$Metadata_perturbation
top_drugs$Metadata_perturbation  = factor(top_drugs$Metadata_perturbation,levels=rev(drug_count_order))

drug_cluster_colors = drug_cluster_colors[all_gt_leiden_phenotypes %in% top_drugs$gt_leiden]
use_gt = all_gt_leiden_phenotypes[all_gt_leiden_phenotypes %in% top_drugs$gt_leiden]
use_gt = c("",use_gt)
drug_cluster_colors = c("grey",drug_cluster_colors)
top_drugs$gt_leiden  = factor(top_drugs$gt_leiden,levels=use_gt)

pool_order = as.character(sort(unique(top_drugs$num_drugs_per_pool)))
top_drugs$num_drugs_per_pool = as.character(top_drugs$num_drugs_per_pool)
top_drugs$num_drugs_per_pool = factor(top_drugs$num_drugs_per_pool,levels=pool_order)


```

Making the dotplot (three plots to generate the data points and values, layout later reformatted in Adobe illustrator)

In [None]:
height_val = 5.25
width_val = 4.5

# use this plot as the main thing to show
top_drugs$Metadata_perturbation  = factor(top_drugs$Metadata_perturbation,levels=rev(drug_count_order))
bblah = top_drugs %>% group_by(Metadata_perturbation,Metadata_compresion,Metadata_replicates,num_drugs_per_pool) %>% summarize(mean_l1 = mean(normalized_l1_norm))

pdf("~/Dropbox (MIT)/Compressed Screening/Latest Draft/FIGURES/Fig3/drugs_hits_in_both_reps_dotplot_permute_reg_squares_legend.pdf",height=height_val,
    width=width_val,useDingbats = F)
ggplot(bblah,aes(x=num_drugs_per_pool,y=Metadata_perturbation,fill=mean_l1)) +
  geom_point(pch=21,size=3)  + 
  ylab("") + xlab("Pool size") + scale_fill_gradient(low="white",high="red") +
  theme_classic() +
  theme(axis.text.x = element_text(size=10),axis.text.y = element_text(size=6),panel.grid.minor.x = element_blank(),panel.grid.minor.y = element_blank(),axis.title.x=element_text(size=12))
dev.off()


# This plot generates the Mahalanobis distance colors
pdf("~/Dropbox (MIT)/Compressed Screening/Latest Draft/FIGURES/Fig3/drugs_hits_in_both_reps_dotplot_log_mahal_squares_legend.pdf",height=height_val,
    width=width_val,useDingbats = F)
ggplot(top_drugs,aes(x=num_drugs_per_pool,y=Metadata_perturbation,color=gt_mahalanobis)) +
  geom_point(shape=15,size=3)  + 
  ylab("") + xlab("Pool size") + scale_color_viridis(option="magma") +
    # ylab("") + xlab("Pool size") + scale_color_distiller(direction=0) +
  theme_classic() +
  theme(axis.text.x = element_text(size=10),axis.text.y = element_text(size=6),panel.grid.minor.x = element_blank(),panel.grid.minor.y = element_blank(),axis.title.x=element_text(size=12))
dev.off()

# This plot generates the GT drug cluster colors
pdf("~/Dropbox (MIT)/Compressed Screening/Latest Draft/FIGURES/Fig3/drugs_hits_in_both_reps_dotplot_log_pheno_squares_legend.pdf",height=height_val,
    width=width_val,useDingbats = F)
ggplot(top_drugs,aes(x=num_drugs_per_pool,y=Metadata_perturbation,color=gt_leiden,)) +
  geom_point(size=3)  + 
  ylab("") + xlab("Pool size") + scale_color_manual(values=drug_cluster_colors) +
    # ylab("") + xlab("Pool size") + scale_color_distiller(direction=0) +
  theme_classic() +
  theme(axis.text.x = element_text(size=10),axis.text.y = element_text(size=6),panel.grid.minor.x = element_blank(),panel.grid.minor.y = element_blank(),axis.title.x=element_text(size=12))
dev.off()
