In [None]:
source("Main.R")
source("Conf.R")
source("Utilities.R")
library("stringr")
library("ggpubr")

In [None]:
models= read.csv(paste0(projectDir, "/TextFiles/ComboKOModels.csv"))

In [None]:
combosObserved <- readRDS(paste0(projectDir, "/Notebooks/CombinatorialPerturbations/RDSFiles/ComboEffects_lm_residuals.rds"))
combosObserved <- combosObserved[combosObserved$term %ni% c("(Intercept)", "n_genes", "mt_frac", paste0("leiden", 1:9)),]
combosObserved[,c("std.error", "statistic", "p.value")] = NULL
combosObserved$term <- sapply(combosObserved$term, 
                              function(x){str_replace_all(x, "_", "")})
colnames(combosObserved) = c("Group", "ObservedLogFCEstimate", "respGene")


combosInterP <- readRDS("./../Notebooks/CombinatorialPerturbations/RDSFiles/ComboEffects_lm_residuals_withInteractions.rds")
combosInterP <- combosInterP[combosInterP$term %ni% c("(Intercept)", "n_genes", "mt_frac", paste0("leiden", 1:9)),]
combosInterP = data.table(combosInterP)
combosInterP[,FDR:=p.adjust(p.value, method = "fdr", n = length(p.value)),by=respGene]
combosInterP = data.frame(combosInterP)
combosInterP$term <- sapply(combosInterP$term, 
                            function(x){str_replace_all(as.character(x),"_","")})
combosInterP$term <- sapply(combosInterP$term, 
                            function(x){str_replace_all(as.character(x),":","W")})
combosInterP <- combosInterP[,c("term", "FDR", "respGene")]
colnames(combosInterP) = c("Group", "FDRInteraction", "respGene")



In [None]:
combosObserved = merge(combosObserved,combosInterP, by=c("Group", "respGene"))

In [None]:
options(repr.plot.width=12, repr.plot.height=12)
allR2s = data.frame()
allSgnR2s = data.frame()

for( i in 1:nrow(models)){

        allResDF = read.csv(paste0(projectDir, "/Notebooks/CombinatorialPerturbations/outputs/", 
                                   models[i,"SubFolder"], 
                                   "/",
                                   models[i,"Model_Name"],
                                   "/PredictedFC_Coef.csv"))


        allResDF = allResDF[allResDF$X != "intercept",]
        allResDF = melt(allResDF)
        colnames(allResDF) = c("Group","respGene","PredictedLogFCEstimate")
        allResDF$Group = sapply(allResDF$Group, 
                                function(x){strsplit(x, "perturb_")[[1]][2]})
        allResDF$Group = sapply(allResDF$Group, 
                                function(x){str_replace(x, "_", "W")})
        allResDF$alpha = models[i,"Alpha"]
        allResDF$SubFolder = models[i,"SubFolder"]



        allRes = merge(combosObserved, allResDF , by=c("Group", "respGene"))
        allRes = data.table(allRes)
        allRes[,R2:=round(computeR2(response = ObservedLogFCEstimate, 
                                    prediction = PredictedLogFCEstimate), 
                          digits=2),by=Group]
        allRes[,MAE:=round(mean(abs(PredictedLogFCEstimate - ObservedLogFCEstimate )), digits=2),by=Group]
        allRes = data.frame(allRes)

        #head(allResDF, n=50)

        allRes$Group1 = sapply(allRes$Group, 
                               function(x){return(strsplit(as.character(x),"W")[[1]][1]) })
        allRes$Group2 = sapply(allRes$Group, 
                               function(x){return(strsplit(as.character(x),"W")[[1]][2]) })
        allRes$ObsPredDif = allRes$ObservedLogFCEstimate - allRes$PredictedLogFCEstimate
        #allRes$AbsObsPredDif = abs(allRes$ObservedLogFCEstimate - allRes$PredictedLogFCEstimate)
        allRes$GroupGene = paste0(allRes$Group,"_",allRes$respGene)
        allRes[is.na(allRes$Group2),"Group2"] = allRes[is.na(allRes$Group2),"Group1"]

        myTmp = unique(allRes[,c("R2","MAE","Group1","Group2","alpha","SubFolder")])
        myTmp$Model = models[i,"Model_Name"]

    
        allR2s = rbind(allR2s, myTmp)

        # options(repr.plot.width=10, repr.plot.height=10)
        # print(ggplot(allRes, aes(x=PredictedLogFCEstimate, y=ObservedLogFCEstimate)) + 
        #     geom_point(alpha=0.4, shape=20, color="blue") + 
        #     facet_grid(Group1 ~ Group2)+
        #     geom_text(aes(label= paste0("R^2 = ",R2), color="red"), x=0, y=-0.3, size=5)+
        #     geom_text(aes(label= paste0("MAE = ",MAE), color="red"), x=0, y=-0.5, size=5)+
        #     geom_abline(linetype="dashed")+
        #     theme_minimal()+
        #     theme(axis.text = element_text(size = 15),
        #           axis.title = element_text(size = 25),
        #           strip.text = element_text(size = 25), legend.position='none')+
        #     xlim(-0.6,0.6)+ylim(-0.6,0.6)+
        #     ylab("Observed fold change wrt control (lg2)")+
        #     xlab("Predicted fold change wrt control (lg2)")+ggtitle(models[i,"Model_Name"]))



        geneDE = allRes[allRes$Group %ni% c("K0", "K1", "K2", "K3", "K4", "K5"),]
        geneDE = geneDE[geneDE$FDRInteraction < 0.1,]
        geneDE = data.table(geneDE)

        geneDE[,SignR2 := computeR2(response = ObservedLogFCEstimate, prediction = PredictedLogFCEstimate),by=Group]
        geneDE[,SignMAE:=round(mean(abs(PredictedLogFCEstimate - ObservedLogFCEstimate )), digits=2),by=Group]

        myTmp2 = unique(geneDE[,c("SignR2","SignMAE","Group1","Group2","alpha","SubFolder")])
        myTmp2$Model = models[i,"Model_Name"]
        myTmp2$SubFolder = models[i,"SubFolder"]


        allSgnR2s = rbind(allSgnR2s, myTmp2)

#         options(repr.plot.width=10, repr.plot.height=10)
#         print(ggplot(geneDE, aes(x=PredictedLogFCEstimate, y=ObservedLogFCEstimate)) + 
#         geom_point(alpha=0.4, shape=20, color="blue") + 
#         facet_grid(Group1 ~ Group2)+
#         geom_text(aes(label= paste0("MAE = ",SignMAE), color="red"), x=0, y=-0.3, size=7)+
#         geom_abline(linetype="dashed")+
#         theme_minimal()+
#         theme(axis.text = element_text(size = 15),
#               axis.title = element_text(size = 25),
#               strip.text = element_text(size = 25), legend.position='none')+
#         geom_vline(xintercept = 0, linetype="dashed", color="red")+
#         geom_hline(yintercept = 0, linetype="dashed", color="red")+
#         xlim(-0.4,0.4)+ylim(-0.4,0.4)+
#         ylab("Observed fold change wrt control (lg2)")+
#         xlab("Predicted fold change wrt control (lg2)")+ggtitle(model))    
    
}


In [None]:
allR2sCombos = allR2s[allR2s$SubFolder != "K_singles",]
allR2sSingles = allR2s[allR2s$SubFolder == "K_singles",]

In [None]:
allR2sSingles = allR2sSingles[allR2sSingles$alpha %in% unique(allR2sCombos$alpha),]

In [None]:
allR2s_sel = rbind(allR2sCombos, allR2sSingles)

In [None]:
allR2s_sel$Group = paste0(allR2s_sel$Group1,"_",allR2s_sel$Group2)

In [None]:
allR2s_sel$SubFolder = factor(allR2s_sel$SubFolder, 
                              levels=c("K_singles", "K1K3", "K2K3", "K2K3_K1K3"))

In [None]:
allR2s_sel = data.table(allR2s_sel)

In [None]:
allR2s_sel$SubFolderAlpha = paste0(allR2s_sel$SubFolder,"_" ,allR2s_sel$alpha)

In [None]:
allR2s_sel[,meanR2 := mean(R2),by=SubFolderAlpha]
allR2s_sel = data.frame(allR2s_sel)

In [None]:
allR2s_sel = allR2s_sel[allR2s_sel$Group %ni% c("K0_K0", "K1_K1", "K2_K2", "K3_K3", "K4_K4", "K5_K5"),]

In [None]:
options(repr.plot.width=20, repr.plot.height=20)
#pdf("./../SupplementaryFigures/S_6O.pdf", width = 20, height = 20)
ggplot(data=allR2s_sel, aes(x=alphaFactor, y=R2,  fill=SubFolder)) +
  facet_wrap(~Group, scales="free", ncol=5)+
  geom_boxplot()+
  theme_bw()+theme(axis.text.x = element_text(size = 20, angle=90),
                   axis.text.y = element_text(size = 20),
                   axis.title = element_text(size = 20),
                   strip.text=element_text(size = 20),
                   legend.position="top")+ylab("Explained variance in gene fold change (R^2)")+
                   #stat_compare_means(method = "wilcox.test", comparisons = cList)+
                   xlab("KL loss weight")

#dev.off()

In [None]:
allR2s_sel_subset = allR2s_sel[allR2s_sel$Group %in% c("K1_K2", "K1_K3", "K2_K3"),]

In [None]:
options(repr.plot.width=10, repr.plot.height=15)
cList = list(c("0.2", "6"))

#pdf("./../SupplementaryFigures/S_6P.pdf", width = 10, height = 15)
ggplot(data=allR2s_sel_subset, aes(x=alphaFactor, y=R2, group=alphaFactor, fill=alphaFactor)) +
  facet_grid(SubFolder~Group, scales="free")+
  geom_boxplot()+
  theme_bw()+theme(axis.text.x = element_text(size = 15, angle=90),
                   axis.text.y = element_text(size = 20),
                   axis.title = element_text(size = 20),
                   strip.text=element_text(size = 20),
                   legend.position="none")+ylab("Explained variance in gene fold change (R^2)")+
                   stat_compare_means(method = "wilcox.test", comparisons = cList, label.y = c(0.7))+
                   xlab("KL loss weight")
#dev.off()

In [None]:
allR2s_sel_alpha6 = allR2s_sel[allR2s_sel$alpha == 6,]

In [None]:
allR2s_sel_alpha6 = allR2s_sel_alpha6[allR2s_sel_alpha6$Group %ni% c("K0_K0", "K1_K1","K2_K2","K3_K3", "K4_K4","K5_K5"),]

In [None]:
allR2s_sel_alpha6_sel_1 = allR2s_sel_alpha6[allR2s_sel_alpha6$Group %ni% c("K0_K1","K0_K2", "K0_K3", "K1_K2",
                                                                         "K1_K3","K1_K4","K2_K3","K3_K4"),]

In [None]:
options(repr.plot.width=30, repr.plot.height=8)
cList = list(c("K_singles", "K1K3"), c("K_singles", "K2K3"), c("K_singles", "K2K3_K1K3"))

pdf("./../SupplementaryFigures/S_7F.pdf", width = 30, height = 8)
ggplot(data=allR2s_sel_alpha6_sel_1, aes(x=SubFolder, y=R2, group=SubFolder, fill=SubFolder)) +
  facet_wrap(~Group, scales="free", nrow = 1)+
  geom_boxplot()+
  theme_bw()+theme(axis.text.x = element_text(size = 25, angle=90),
                   axis.text.y = element_text(size = 25),
                   axis.title = element_text(size = 25),
                   strip.text=element_text(size = 25),
                   legend.position="none")+ylab("Explained variance in \n gene fold change(R^2)")+
                   #stat_compare_means(method = "wilcox.test", comparisons = cList)+
                   xlab("Included groups during training")

dev.off()

In [None]:
allR2s_sel_alpha6_sel = allR2s_sel_alpha6[allR2s_sel_alpha6$Group %in% c("K0_K1","K0_K2", "K0_K3", "K1_K2", "K1_K3","K1_K4","K2_K3","K3_K4"),]

In [None]:
options(repr.plot.width=15, repr.plot.height=15)
cList = list(c("K_singles", "K1K3"), c("K_singles", "K2K3"), c("K_singles", "K2K3_K1K3"))

pdf("./../Figure_5E.pdf", width=15, height=15)
ggplot(data=allR2s_sel_alpha6_sel, aes(x=SubFolder, y=R2, group=SubFolder, fill=SubFolder)) +
  facet_wrap(~Group, scales="free", ncol = 4)+
  geom_boxplot()+
  theme_bw()+theme(axis.text.x = element_text(size = 25, angle=90),
                   axis.text.y = element_text(size = 25),
                   axis.title = element_text(size = 25),
                   strip.text=element_text(size = 25),
                   legend.position="none")+ylab("Explained variance in gene fold change (R^2)")+
                   #stat_compare_means(method = "wilcox.test", comparisons = cList)+
                   xlab("Included groups during training")

dev.off()