In [None]:
source("Main.R")
source("Conf.R")
source("Utilities.R")
library("factoextra")
library("maptree")
library("ggpubr")
library("ica")
library("bayesbio")
#library(ICtest)

# library("AnnotationDbi")
# library("org.Mm.eg.db")
#library("clusterProfiler")


library(repr)
geneCutOff=4
targetCutOff=15
nFactors=15

require(DOSE)
options(future.globals.maxSize= 891289600)
options(HTTPConstants.CHUNKED = "false")



In [None]:
writePathwayFile <- function(pathwayGenes, fileName, cNames=F){
  maxlen <- max(lengths(pathwayGenes))
  pathwayGenes2 <- lapply(pathwayGenes, function(lst) c(lst, rep(NA, maxlen - length(lst))))

  pathwayGenes.df <- do.call("cbind", lapply(pathwayGenes2, as.data.frame)) 
  colnames(pathwayGenes.df) <- names(pathwayGenes)

  write.table(pathwayGenes.df,  fileName, sep=",", row.names = F, na = " ", quote = F, col.names = cNames)
}

In [None]:
selCoefs <- read.csv("./../TextFiles/SignificantCoefMatrix.csv", row.names = 1)
selCoefs <- t(selCoefs)
KOGenes <-  colnames(selCoefs)
KOGenes <- sapply(KOGenes, function(x){strsplit(x,"_")[[1]][2]})
colnames(selCoefs) <- KOGenes

In [None]:
guideModulesN <- data.frame(read.csv("./../TextFiles/ME_GuideModules_leiden_6_Modules.csv"),  row.names = 1)
rownames(guideModulesN) <- guideModulesN$GuideName 
guideModulesN$GuideName <- NULL
guideModulesN$GuideGroup <- factor(guideModulesN$GuideGroup)
guideModulesN$GuideGroup <- paste0("K", guideModulesN$GuideGroup)

head(guideModulesN)

In [None]:
geneModulesN <- data.frame(read.csv("./../TextFiles/ME_GeneModules_leiden_11_Modules.csv", row.names = 1))
rownames(geneModulesN) <- geneModulesN$GeneName
geneModulesN$GeneName = NULL
geneModulesN$GeneGroup <- factor(geneModulesN$GeneGroup)

head(geneModulesN)

In [None]:
k <- icaimax(selCoefs, nc = 15, center = T)

In [None]:
myH <- data.frame(t(k$S))
myW <- data.frame(k$M)
rownames(myW) <- KOGenes
colnames(myH) <- rownames(selCoefs)
rownames(myH) <- paste0("Factor ",1:nrow(myH))
colnames(myW) <- paste0("Factor ",1:ncol(myW))


In [None]:
plotFactorHeatmap <- function(coefsAll, sKOGenes, sEffectGenes, elem){
  tempDF <- t(coefsAll[sEffectGenes, sKOGenes])
  #tmpPVals <- pValsAllAdj[sTargets, sGenes]
  #tempDF[tmpPVals > 0.1] <- 0
  tempDF[tempDF > 0.2] = 0.2
  tempDF[tempDF < -0.2] = -0.2
   
  annoCols<-list(GeneGroup=c(G0='#A6CEE3',
                           G1='#1F78B4',
                           G2='#B2DF8A',
                           G3='#33A02C', 
                           G4='#FB9A99', 
                           G5='#FDBF6F', 
                           G6='#FF7F00', 
                           G7='#CAB2D6',
                           G8='#6A3D9A', 
                           G9='#FFFF99', 
                           G10="#B5651D" ),
                 GuideGroup=c(K0="#1f77b4",
                              K1="#ff7f0e",
                              K2="#279e68",
                              K3="#d62728", 
                              K4="#aa40fc", 
                              K5="#8c564b"))
    
  annotDFcol <- geneModulesN[colnames(tempDF),]
  annotDFrow <- guideModulesN[rownames(tempDF),]
    
  annotDFcol$GeneColor = NULL
  annotDFrow$GuideColor = NULL
    
  options(repr.plot.width=10, repr.plot.height=16)
  km2 <- pheatmap(t(tempDF), main = paste0("Factor ",elem),
           na_col = "grey",  
           annotation_row = annotDFcol,
            annotation_col = annotDFrow,
            annotation_colors = annoCols,
           clustering_method="ward.D2",
           clustering_distance_rows="euclidean",
           clustering_distance_cols="euclidean", colorRampPalette(c("blue", "white", "orange"))(100))
    
  save_pheatmap_pdf(km2, paste0("Factor ",elem,".pdf"), width=10, height=16)
  
  koOrder= colnames(tempDF)[km2$tree_col$order]
  geneOrder = rownames(tempDF)[km2$tree_row$order]
    
  geneClusters=cutree(km2$tree_row, h=1)
  i=1
  while(length(unique(geneClusters)) > 2){
    i = (i+0.5)
    geneClusters <- cutree(km2$tree_row, h=i)
  }
  
    
  geneClusters_one <- names(geneClusters[geneClusters==1])
  geneClusters_two <- names(geneClusters[geneClusters==2])
    
    
  guideClusters=cutree(km2$tree_col, h=1)
  i=1
  while(length(unique(guideClusters)) > 2){
    i = (i+0.5)
    guideClusters <- cutree(km2$tree_col, h=i)
  }
  
  guideClusters_one <- names(guideClusters[guideClusters==1])
  guideClusters_two <- names(guideClusters[guideClusters==2])
    
  return(list("geneClusters_one"=geneClusters_one, "geneClusters_two"=geneClusters_two,
              "guideClusters_one"=guideClusters_one, "guideClusters_two"=guideClusters_two,
             "geneOrder" = geneOrder, "koOrder" = koOrder))
}


In [None]:
getOutliers_up <- function(x){
  qX <- quantile(x)
  IQR = qX[4] - qX[2]
  minLev = qX[2] - IQR
  maxLev = qX[4] + IQR
  return(x[ x > maxLev])
}

getOutliers_down <- function(x){
  qX <- quantile(x)
  IQR = qX[4] - qX[2]
  minLev = qX[2] - IQR
  maxLev = qX[4] + IQR
  
  return(x[ x < minLev])
}

In [None]:
pathwayKOs <- list()
pathwayKOs_up <- list()
pathwayKOs_down <- list()


pathwayEffectedGenes <- list()
pathwayEffectedUpGenes <- list()
pathwayEffectedDownGenes <- list()
factorGeneGroups <- list()
geneKOOrder <- list()


for(elem in c(1:nFactors)){
  print(paste0("############## FACTOR : ",elem, " ##################"))
  
  factor0X <- myW[,elem]
  names(factor0X) <- rownames(myW)
  sKOGenes_up <- names(getOutliers_up(factor0X))
  sKOGenes_down <- names(getOutliers_down(factor0X))
    
  factor0Y <- unlist(myH[elem,])
  names(factor0Y) <- colnames(myH)
  sEffectGenes_up <- names(getOutliers_up(factor0Y))
  sEffectGenes_down <- names(getOutliers_down(factor0Y))

 
  k <- plotFactorHeatmap(selCoefs, c(sKOGenes_up, sKOGenes_down), c(sEffectGenes_up, sEffectGenes_down), elem)

  names(k) <- c(paste0("Factor ", elem, " gene group 1"), 
                paste0("Factor ", elem, " gene group 2"),
                paste0("Factor ", elem, " guide group 1"),
                paste0("Factor ", elem, " guide group 2"),
                paste0("Factor ", elem, " gene order"), 
                paste0("Factor ", elem, " guide order") )
  factorGeneGroups <- lappend(factorGeneGroups, k[1:4])
  geneKOOrder <- lappend(geneKOOrder, k[5:6])

  
  pathwayEffectedUpGenes <- lappend(pathwayEffectedUpGenes, sEffectGenes_up)
  pathwayEffectedDownGenes <- lappend(pathwayEffectedDownGenes, sEffectGenes_down)
  pathwayEffectedGenes <- lappend(pathwayEffectedGenes, c(sEffectGenes_up, sEffectGenes_down))
    
  pathwayKOs_up <- lappend(pathwayKOs_up, sKOGenes_up) 
  pathwayKOs_down <- lappend(pathwayKOs_down, sKOGenes_down) 
  pathwayKOs <- lappend(pathwayKOs, c(sKOGenes_up, sKOGenes_down))
    
}


In [None]:
names(pathwayEffectedUpGenes) = paste0("Factor_",1:15)
names(pathwayEffectedDownGenes) = paste0("Factor_",1:15)

In [None]:
factorGeneGroups <- unlist(factorGeneGroups, recursive = F)
lLen <- unlist(lapply(factorGeneGroups, length))
factorGeneGroups <- factorGeneGroups[lLen!=0]

In [None]:
myICAFactors = data.frame(read.csv("./../TextFiles/ICA_15_factors_effectedGenes_paper.csv",
                                   stringsAsFactors=FALSE), 
                          stringsAsFactors=FALSE)
myICAFactors_guides = myICAFactors[,grep("guide", colnames(myICAFactors))]
myICAFactors_genes = myICAFactors[,grep("gene", colnames(myICAFactors))]


In [None]:
icaGuides = unique(unlist(myICAFactors_guides))
icaGuides = icaGuides[icaGuides != ' ']
KOFactorMatrix = data.frame(matrix(0, nrow = length(icaGuides), ncol= nFactors))
colnames(KOFactorMatrix) <- paste0("Factor ", 1:ncol(KOFactorMatrix))
rownames(KOFactorMatrix) <- sort(icaGuides)

factorNames = paste0("Factor ", sapply(colnames(myICAFactors_guides), 
                                       function(x){return(strsplit(x,"\\.")[[1]][2])} ))


for(i in 1:ncol(myICAFactors_guides)){
    facKOGenes = myICAFactors_guides[,i]
    facKOGenes = facKOGenes[facKOGenes != " "]
    KOFactorMatrix[facKOGenes,factorNames[i]] = 1
}

KOFactorMatrix = KOFactorMatrix[order(-KOFactorMatrix[,1],
                                      -KOFactorMatrix[,2], 
                                      -KOFactorMatrix[,3], 
                                      -KOFactorMatrix[,4], 
                                      -KOFactorMatrix[,5],
                                      -KOFactorMatrix[,6],
                                      -KOFactorMatrix[,7], 
                                      -KOFactorMatrix[,8], 
                                      -KOFactorMatrix[,9], 
                                      -KOFactorMatrix[,10],
                                      -KOFactorMatrix[,11],
                                      -KOFactorMatrix[,12], 
                                      -KOFactorMatrix[,13], 
                                      -KOFactorMatrix[,14], 
                                      -KOFactorMatrix[,15]),]

In [None]:
options(repr.plot.width=6, repr.plot.height=26)

kk = pheatmap(KOFactorMatrix, 
              border_color="black", 
              color = c("white","darkgreen"),
              cluster_cols = F,
              cluster_rows = F,
              method="ward.D2",
              main = "KO genes per factor", 
              fontsize_col = 20,
              treeheight_col = 0, 
              treeheight_row = 0)

In [None]:
jaccGenDist_1 = data.frame(matrix(0, nrow = nFactors, ncol = nFactors))
colnames(jaccGenDist_1) <- colnames(KOFactorMatrix)
rownames(jaccGenDist_1) <- colnames(KOFactorMatrix)

for(i in 1:nFactors){
  for(j in 1:nFactors){
    jaccGenDist_1[i,j] = jaccardSets(rownames(KOFactorMatrix)[KOFactorMatrix[,paste0("Factor ",i)] ==  1],
                                     rownames(KOFactorMatrix)[KOFactorMatrix[,paste0("Factor ",j)] ==  1])
  }
}

diag(jaccGenDist_1) <- NA


In [None]:
jaccGenDist_1

In [None]:
options(repr.plot.width=10, repr.plot.height=10)
#jaccGenDist[lower.tri(jaccGenDist)] <- NA

xx = pheatmap(jaccGenDist_1, cluster_rows = TRUE,
         cluster_cols=TRUE,
         clustering_distance_rows="euclidean", 
         display_numbers = round(jaccGenDist_1,2),
         method="ward.D2",
clustering_distance_cols="euclidean", 
         fontsize = 15)

#save_pheatmap_pdf(xx, "Figure_S4F.pdf", width=10, height=10)

In [None]:
# corrplot::corrplot(as.matrix(jaccGenDist), order = 'AOE', 
#                     method = 'number', is.corr = FALSE, tl.cex = 1.5,
#                    diag = FALSE, type = 'upper')

In [None]:
GenesFactorMatrix = data.frame(matrix(0, ncol = length(unique(unlist(myICAFactors_genes))), nrow= nFactors))
rownames(GenesFactorMatrix) <- paste0("Factor ", 1:nrow(GenesFactorMatrix))
colnames(GenesFactorMatrix) <- sort(unique(unlist(myICAFactors_genes)))

factorNames = paste0("Factor ", sapply(colnames(myICAFactors_genes), 
                                       function(x){return(strsplit(x,"\\.")[[1]][2])} ))


for(i in 1:ncol(myICAFactors_genes)){
    facGenes = myICAFactors_genes[,i]
    facGenes = facGenes[facGenes != " "]
    GenesFactorMatrix[factorNames[i],facGenes] = 1
}

GenesFactorMatrix = t(GenesFactorMatrix)
GenesFactorMatrix = GenesFactorMatrix[order(-GenesFactorMatrix[,1],
                                            -GenesFactorMatrix[,2], 
                                            -GenesFactorMatrix[,3],
                                            -GenesFactorMatrix[,4], 
                                            -GenesFactorMatrix[,5],
                                            -GenesFactorMatrix[,6],
                                            -GenesFactorMatrix[,7], 
                                            -GenesFactorMatrix[,8], 
                                            -GenesFactorMatrix[,9],
                                            -GenesFactorMatrix[,10],
                                            -GenesFactorMatrix[,11],
                                            -GenesFactorMatrix[,12], 
                                            -GenesFactorMatrix[,13], 
                                            -GenesFactorMatrix[,14], 
                                            -GenesFactorMatrix[,15]
                   ),]
GenesFactorMatrix = t(GenesFactorMatrix) 

In [None]:
options(repr.plot.width=15, repr.plot.height=4)

xx = pheatmap(GenesFactorMatrix, 
              border_color="black", 
              color = c("white","red"),
              cluster_cols = FALSE,
              method="ward.D2",
              cluster_rows = FALSE,
              main = "Outlier genes per factor", 
              fontsize_col = 10,
              treeheight_col = 0, 
              treeheight_row = 0)

#save_pheatmap_pdf(xx, "Figure_S4B.pdf", width=15, height=4)

In [None]:
options(repr.plot.width=10, repr.plot.height=10)

jaccGenDist = data.frame(matrix(0, nrow = nFactors, ncol = nFactors))
colnames(jaccGenDist) <- rownames(GenesFactorMatrix)
rownames(jaccGenDist) <- rownames(GenesFactorMatrix)

for(i in 1:nFactors){
  for(j in 1:nFactors){
    jaccGenDist[i,j] = jaccardSets(colnames(GenesFactorMatrix)[GenesFactorMatrix[paste0("Factor ",i),] ==  1],
                                   colnames(GenesFactorMatrix)[GenesFactorMatrix[paste0("Factor ",j),] ==  1])
  }
}

diag(jaccGenDist) <- NA

In [None]:
options(repr.plot.width=10, repr.plot.height=10)

xx = pheatmap(jaccGenDist, clustering_method="ward.D2",
 clustering_distance_rows="euclidean", 
         display_numbers = round(jaccGenDist,2),
clustering_distance_cols="euclidean", 
         fontsize = 15)

save_pheatmap_pdf(xx, "S_5G.pdf", width=10, height=10)

In [None]:
options(repr.plot.width=10, repr.plot.height=10)

rcOrder = rownames(jaccGenDist)[xx$tree_col$order]

xx = pheatmap(jaccGenDist_1[rcOrder, rcOrder], 
              cluster_rows=FALSE,
              cluster_cols=FALSE,
 clustering_distance_rows="euclidean", 
         display_numbers = round(jaccGenDist_1[rcOrder, rcOrder],2),
clustering_distance_cols="euclidean", 
         fontsize = 15)

save_pheatmap_pdf(xx, "S_5H.pdf", width=10, height=10)