In [None]:
source("Main.R")
source("Conf.R")
source("Utilities.R")
library("maptree")
library(igraph)
library(Hmisc)
library(ggpubr)
library(BiRewire)

In [None]:
annoCols<-list(GeneGroup=c(GeneGroup_0="#1B9E77",
                           GeneGroup_1="#D95F02",
                           GeneGroup_2="#7570B3",
                           GeneGroup_3="#E7298A", 
                           GeneGroup_4="#66A61E", 
                           GeneGroup_5="#E6AB02", 
                           GeneGroup_6="#A6761D", 
                           GeneGroup_7="#666666"),
                 GuideGroup=c(K0="#1f77b4", 
                              K1="#ff7f0e", 
                              K2="#279e68", 
                              K3="#d62728", 
                              K4="#aa40fc",
                              K5="#8c564b"))


In [None]:
realE3s = as.data.frame(read.csv(file.path(projectDir,"ManuscriptFigures",
                                           "220610_regulators_metadata_E3_Complex.csv")),
                             stringsAsFactors = FALSE)
e3s = unique(realE3s$Symbol_guides)
e3s = e3s[e3s != '']
length(e3s)

In [None]:
koModules = read.csv("./../TextFiles/ME_GuideModules_leiden_6_Modules.csv", row.names = 1)
geneModules = read.csv("./../TextFiles/ME_GeneModules_leiden_11_Modules.csv", row.names = 1)
allConsideredGenes = unique(c(koModules$GuideName, geneModules$GeneName))
length(allConsideredGenes)

In [None]:
proInf = read.csv("./../PositiveControls/10090.protein.aliases.v11.5.txt", sep="\t")
proInf = unique(proInf[proInf$alias %in% allConsideredGenes,c("X.string_protein_id", "alias")])
colnames(proInf) = c("ProID", "GeneName")
dim(proInf)

In [None]:
allInteractions = read.csv("./../PositiveControls/10090.protein.links.detailed.txt", sep = " ")
allInteractions = allInteractions[allInteractions$protein1 %in% proInf$ProID,]
allInteractions = allInteractions[allInteractions$protein2 %in% proInf$ProID,]
write.csv(allInteractions, "./../TextFiles/10090.protein.links.detailed_ourGenes.txt", row.names=FALSE)


allInteractions = read.csv("./../TextFiles/10090.protein.links.detailed_ourGenes.txt")
proInf_1 = copy(proInf)

allInteractions$ProID = allInteractions$protein1
colnames(proInf_1) = c("ProID", "GeneName_1")
allInteractions = merge(allInteractions, proInf_1, by="ProID")


allInteractions$ProID = allInteractions$protein2
colnames(proInf_1) = c("ProID", "GeneName_2")

allInteractions = merge(allInteractions, proInf_1, by="ProID")
allInteractions = allInteractions[allInteractions$experimental != 0,]
allInteractions = allInteractions[,c("GeneName_1", "GeneName_2", "experimental", 
                                     "neighborhood", "fusion", "cooccurence", 
                                    "coexpression", "database", "textmining",
                                    "combined_score")]

allInteractions$GenePair = apply(allInteractions, 1, function(x){ paste0(sort( c(x[["GeneName_1"]], x[["GeneName_2"]])), 
                                                                         collapse = "_" )})

allInteractions = unique(allInteractions[,c("GenePair", "experimental", 
                                     "neighborhood", "fusion", "cooccurence", 
                                    "coexpression", "database", "textmining",
                                    "combined_score")])

allInteractions$GeneName_1 = sapply(allInteractions$GenePair, function(x){strsplit(x,"_")[[1]][1]})
allInteractions$GeneName_2 = sapply(allInteractions$GenePair, function(x){strsplit(x,"_")[[1]][2]})

allInteractions = allInteractions[,c("GeneName_1", "GeneName_2", "experimental", 
                                     "neighborhood", "fusion", "cooccurence", 
                                    "coexpression", "database", "textmining",
                                    "combined_score", "GenePair")]


In [None]:
#write.csv(allInteractions, "./../TextFiles/STRING_Interactions.csv", row.names=FALSE)

In [None]:
allInteractions_KOs = allInteractions[allInteractions$GeneName_1 %in% koModules$GuideName,]
allInteractions_KOs = allInteractions_KOs[allInteractions_KOs$GeneName_2 %in% koModules$GuideName,]

In [None]:
allInteractions_E3s = allInteractions[allInteractions$GeneName_1 %in% e3s,]
allInteractions_E3s = allInteractions_E3s[allInteractions_E3s$GeneName_2 %in% e3s,]

In [None]:
coefsAll = read.csv("./../MixedEffectLMOutputs/ME_SignificantBetaCoefs.csv", row.names = 1)
rownames(coefsAll) = sapply(rownames(coefsAll), function(x){strsplit(x,"_")[[1]][2]})
myCorr = rcorr(t(coefsAll), type="pearson")

myCovar = data.frame(myCorr$r)
myCovar_Pvals = data.frame(myCorr$P)

myCovar[myCovar_Pvals > 0.05] = 0

In [None]:
getInteractionGraph <- function(IntGraph, myCovar, koModules){
    
    for(i in 1:nrow(IntGraph)){
    
        IntGraph[i,"CorValue"] = myCovar[IntGraph[i,"GeneName_1"], IntGraph[i,"GeneName_2"]]
    }
    
    
    koModulesSelected = koModules[koModules$GuideName %in% unique(c(IntGraph$GeneName_1, IntGraph$GeneName_2)),]
    
    
    
    km1 = copy(koModulesSelected)
    colnames(km1) = c("GuideName", "GuideGroup_gene1", "GuideColor_gene1")

    IntGraph$GuideName = IntGraph$GeneName_1
    IntGraph = merge(IntGraph,km1, by="GuideName")
    
    
    km2 = copy(koModulesSelected)
    colnames(km2) = c("GuideName", "GuideGroup_gene2", "GuideColor_gene2")
    
    IntGraph$GuideName = IntGraph$GeneName_2
    IntGraph = merge(IntGraph,km2, by="GuideName")

    g2 <- graph.data.frame(IntGraph[,c("GeneName_1","GeneName_2")],
                           vertices=koModulesSelected,
                           directed=FALSE)
    
    return(list(myGr = g2, IntGraph=IntGraph))

}

In [None]:
# GroupByVertex01 = function(Groups, spacing = 5) {
#          Position = (order(Groups) + spacing*Groups)
#          Angle    = Position * 2 * pi / max(Position)
#          matrix(c(cos(Angle), sin(Angle)), ncol=2)
# }

# GroupByVertex02 = function(Groups) {
#          numGroups = length(unique(Groups))
#          GAngle    = (1:numGroups) * 2 * pi / numGroups
#          Centers   = matrix(c(cos(GAngle), sin(GAngle)), ncol=2)
#          x = y = c()
#          for(i in 1:numGroups) {
#                  curGroup = which(Groups == unique(Groups)[i])
#                  VAngle = (1:length(curGroup)) * 2 * pi / length(curGroup)
#                  x = c(x, Centers[i,1] + cos(VAngle) / numGroups )
#                  y = c(y, Centers[i,2] + sin(VAngle) / numGroups)
#          }
#          matrix(c(x, y), ncol=2)
# }


In [None]:
#allInteractions_E3s = allInteractions_E3s[allInteractions_E3s$experimental > 199,]

In [None]:
# res1 = getInteractionGraph(allInteractions_KOs, myCovar, koModules)
# res1 = res1$IntGraph
#res1$GuideName = NULL
#write.csv(res1, "All_329_KOs.csv", row.names=FALSE)

In [None]:
res2 = getInteractionGraph(allInteractions_E3s, myCovar, koModules)
res2 = res2$IntGraph
res2$GuideName = NULL
res2 = res2[which(res2$GeneName_1 != res2$GeneName_2),]
write.csv(res2, "Only_E3s.csv", row.names=FALSE)

In [None]:
res2 <- read.csv("Only_E3s.csv")
head(res2)

In [None]:
res2$CorValueBinary = res2$CorValue
res2[res2$CorValue > 0, "CorValueBinary"] = 1
res2[res2$CorValue < 0, "CorValueBinary"] = -1
res2[res2$CorValue == 0, "CorValueBinary"] = NA

res2$GroupPair =  apply(res2,1, function(x){kk = c(x["GuideGroup_gene1"], x["GuideGroup_gene2"])
                          paste0(kk[order(kk)], collapse = "_")})

res2$GroupPair = factor(res2$GroupPair, levels = c('0_0', '0_1', '0_2', '0_3', '0_4', '0_5',
                                                  '1_1', '1_2', '1_3', '1_4', '1_5',
                                                  '2_2', '2_3', '2_4', '2_5',
                                                   '3_3', '3_4', '3_5',
                                                  '4_4', '4_5', '5_5'))

#res2 = res2[res2$experimental > 199,]
head(res2)

In [None]:
uE3s = sort(unique(c(res2$GeneName_1, res2$GeneName_2)))
nUniqGenes = length(uE3s)

myDF = data.frame(matrix(0, nrow = nUniqGenes, ncol = nUniqGenes))
colnames(myDF) = uE3s
rownames(myDF) = uE3s
for(i in 1:nrow(res2)){
    myDF[res2[i,"GeneName_1"], res2[i,"GeneName_2"]] = res2[i,"CorValueBinary"]
    myDF[res2[i,"GeneName_2"], res2[i,"GeneName_1"]] = res2[i,"CorValueBinary"]
}


koModulesE3s = koModules[koModules$GuideName %in% uE3s,]
koModulesE3s$GuideGroup = paste0("K",koModulesE3s$GuideGroup)
koModulesE3s$GuideGroup = factor(koModulesE3s$GuideGroup, levels=unique(koModulesE3s$GuideGroup))



In [None]:
myDF = myDF[koModulesE3s$GuideName, koModulesE3s$GuideName]
dim(myDF)

In [None]:
myDF = myDF[koModulesE3s$GuideName, koModulesE3s$GuideName]
annotDF = koModulesE3s[rownames(myDF),]
annotDF$GuideName = NULL
annotDF$GuideColor = NULL


options(repr.plot.width=8, repr.plot.height=8)
kk = pheatmap(myDF,
         annotation_col = annotDF,
         annotation_row = annotDF,
         annotation_colors = annoCols,
         cluster_rows=FALSE,
         cluster_cols=FALSE,
         clustering_method="ward.D2",
         clustering_distance_rows="euclidean",
               treeheight_row=0,
               treeheight_col=0,
              colorRampPalette(c("blue", "white", "red"))(100),
              fontsize=4,
              na_col="darkgrey")
 
#save_pheatmap_pdf(kk, "S_4F.pdf", width=14, height=12)


In [None]:
mm <- sapply(myDF, function(x){length(x[which(x == 0)])})
#hist(kk, breaks=100)

selGenes = names(mm[mm < 141])
myDF_selected = myDF[selGenes, selGenes]

annotDF = koModulesE3s[rownames(myDF_selected),]
annotDF$GuideName = NULL
annotDF$GuideColor = NULL


options(repr.plot.width=14, repr.plot.height=12)
kk = pheatmap(myDF_selected,
         annotation_col = annotDF,
         annotation_row = annotDF,
         annotation_colors = annoCols,
         clustering_method="ward.D2",
         treeheight_row=0,
         treeheight_col=0,
         clustering_distance_rows="euclidean",colorRampPalette(c("blue", "white", "red"))(100), na_col="darkgrey")

#save_pheatmap_pdf(kk, "Figure_3C.pdf", width=14, height=12)

In [None]:
adjOrj = copy(myDF)
rownames(adjOrj) = paste0(rownames(adjOrj), "_",koModulesE3s[rownames(adjOrj),"GuideGroup"])
colnames(adjOrj) = paste0(colnames(adjOrj), "_",koModulesE3s[colnames(adjOrj),"GuideGroup"])
adjOrj[adjOrj != 0 | is.na(adjOrj)] = 1

adjOrjTmp = copy(adjOrj)
adjOrjTmp$Gene = rownames(adjOrjTmp)
adjOrjTmp = melt(adjOrjTmp, id.vars = "Gene")
adjOrjTmp$Group1 = sapply(adjOrjTmp$Gene, function(x){strsplit(as.character(x),"_")[[1]][2]})
adjOrjTmp$Group2 = sapply(adjOrjTmp$variable, function(x){strsplit(as.character(x),"_")[[1]][2]})
adjOrjTmp$GroupPair =  apply(adjOrjTmp,1, function(x){kk = c(x["Group1"], x["Group2"])
                          paste0(kk[order(kk)], collapse = "_")})

adjOrjTmp <- adjOrjTmp[adjOrjTmp$value == 1,]
observedInterFreq = data.frame(table(adjOrjTmp$GroupPair))
observedInterFreq

In [None]:
observedInterFreq$G1 = sapply(observedInterFreq$Var1, function(x){strsplit(as.character(x),"_")[[1]][1]})
observedInterFreq$G2 = sapply(observedInterFreq$Var1, function(x){strsplit(as.character(x),"_")[[1]][2]})
head(observedInterFreq)

In [None]:
# allSimulatedFreq = data.frame()


# for(k in 1:200){
#     adjOrjTmp = copy(adjOrj)
#     adjOrjTmp = birewire.rewire.undirected(adjOrjTmp) 


#     adjOrjTmp$Gene = rownames(adjOrjTmp)
#     adjOrjTmp = melt(adjOrjTmp, id.vars = "Gene")
#     adjOrjTmp$Group1 = sapply(adjOrjTmp$Gene, function(x){strsplit(as.character(x),"_")[[1]][2]})
#     adjOrjTmp$Group2 = sapply(adjOrjTmp$variable, function(x){strsplit(as.character(x),"_")[[1]][2]})
#     adjOrjTmp$GroupPair =  apply(adjOrjTmp,1, function(x){kk = c(x["Group1"], x["Group2"])
#                               paste0(kk[order(kk)], collapse = "_")})

#     adjOrjTmp <- adjOrjTmp[adjOrjTmp$value == 1,]
#     simulatedInterFreq = data.frame(table(adjOrjTmp$GroupPair))
#     allSimulatedFreq = rbind(allSimulatedFreq, simulatedInterFreq)
# }
# #saveRDS(allSimulatedFreq, "allSimulatedFreq_200Thresh.rds")

In [None]:
allSimulatedFreq = readRDS("allSimulatedFreq.rds")
#allSimulatedFreq = readRDS("allSimulatedFreq_200Thresh.rds")
allSgnMatrix = data.frame(matrix(NA, 6, 6))
allGroups = c("K0", "K1", "K2", "K3", "K4", "K5")
colnames(allSgnMatrix) = allGroups
rownames(allSgnMatrix) = allGroups

for(i in 1:nrow(observedInterFreq)){
    mObs = observedInterFreq[i,"Freq"]
    
    print(i)
    simFreq = allSimulatedFreq[as.character(allSimulatedFreq$Var1) ==  as.character(observedInterFreq[i,"Var1"]), "Freq"]
    
    allSgnMatrix[observedInterFreq[i,"G1"],observedInterFreq[i,"G2"]] = length(simFreq[simFreq>mObs]) / length(simFreq)
    
}

allSgnMatrix

In [None]:
numberOfInteractions = sapply(myDF, function(x){164 - length(x[which(x == 0)])})
numberOfPositiveInteractions = sapply(myDF, function(x){length(x[which(x > 0)])})
numberOfNegativeInteractions = sapply(myDF, function(x){length(x[which(x < 0)])})
numberOfUncorrelationgInteractions = sapply(myDF, function(x){length(x[which(is.na(x))])})

observedInteractionCor = data.frame(numberOfInteractions = numberOfInteractions,
    ObservedPositiveInteractions = numberOfPositiveInteractions,
    ObservedNegativeInteractions = numberOfNegativeInteractions,
    numberOfUncorrelationgInteractions = numberOfUncorrelationgInteractions)
head(observedInteractionCor)

In [None]:
obsInt = melt(observedInteractionCor[,c("ObservedPositiveInteractions", "ObservedNegativeInteractions")])
obsInt$Direction = "Positive_Correlations"
obsInt[obsInt$variable == "ObservedNegativeInteractions", "Direction"] = "Negative_Correlations"
obsInt$type = "Observed"
head(obsInt)

In [None]:
myCovarTmp = myCovar[rownames(myDF), colnames(myDF)]
myCovarTmp[myCovarTmp > 0] = 1
myCovarTmp[myCovarTmp < 0] = -1

In [None]:
allSampledInter = data.frame()

for(i in 1:100){
    adjCor = copy(myDF)
    adjCor[adjCor != 0 | is.na(adjCor)] = 1
    adjOrjTmp = birewire.rewire.undirected(adjCor)
    adjOrjTmp[adjOrjTmp == 0] = NA
    sampledInter = myCovarTmp * adjOrjTmp
    
    numberOfInteractions = sapply(sampledInter, 
                                  function(x){164 - length(x[which(is.na(x))])})
    sampledInterPositiveInteractions = sapply(sampledInter, 
                                              function(x){length(x[which(x > 0)])})
    sampledInterNegativeInteractions = sapply(sampledInter, 
                                              function(x){length(x[which(x < 0)])})
    numberOfUncorrelationgInteractions = sapply(sampledInter, 
                                                function(x){length(x[which(x == 0)])})

    sampledInteractionCor = data.frame(numberOfInteractions = numberOfInteractions,
        sampledPositiveInteractions = numberOfPositiveInteractions,
        sampledNegativeInteractions = numberOfNegativeInteractions,
        numberOfUncorrelationgInteractions = numberOfUncorrelationgInteractions)
    
    allSampledInter = rbind(allSampledInter, sampledInteractionCor)
}

In [None]:
sampInt = melt(allSampledInter[,c("sampledPositiveInteractions", "sampledNegativeInteractions")])
sampInt$Direction = "Positive_Correlations"
sampInt$type = "Sampled"
sampInt[sampInt$variable == "sampledNegativeInteractions", "Direction"] = "Negative_Correlations"
head(sampInt)

In [None]:
allInter = rbind(sampInt, obsInt)

In [None]:
options(repr.plot.width=6, repr.plot.height=4)

cList=list(c("Observed", "Sampled"))

ggplot(allInter, aes(x = type, y = value, color = type)) +
  geom_boxplot()+
  facet_wrap(~Direction)+
  labs(y = "Number of significant correlations ") +
  theme_minimal()+
  stat_compare_means(method = "wilcox.test", comparisons = cList)+theme(legend.position="None")+xlab("")

In [None]:
myCovarE3s = myCovar[rownames(myDF), colnames(myDF)]
myCovarE3s$genes = rownames(myCovarE3s)
myCovarE3s = melt(myCovarE3s)
myCovarE3s = myCovarE3s[myCovarE3s$genes != myCovarE3s$variable,]
head(myCovarE3s)


In [None]:
set.seed(6)
for( i in rownames(observedInteractionCor)){
    numInter_i = observedInteractionCor[i,"numberOfInteractions"]
    myCovarE3s_i = myCovarE3s[myCovarE3s$variable ==i,]

    sampledCor = myCovarE3s_i[sample(nrow(myCovarE3s_i), numInter_i),"value"]
    observedInteractionCor[i,"SampledPositiveInteractions"] = length(which(sampledCor > 0))
    observedInteractionCor[i,"SampledNegativeInteractions"] = length(which(sampledCor < 0))
    observedInteractionCor[i,"SampledNotSignInter"] = length(which(sampledCor == 0))
}

observedInteractionCor$ObservedCorrelatingInteractions = observedInteractionCor$ObservedPositiveInteractions + observedInteractionCor$ObservedNegativeInteractions
observedInteractionCor$SampledCorrelatingInteractions = observedInteractionCor$SampledPositiveInteractions + observedInteractionCor$SampledNegativeInteractions

observedInteractionCor$gene = rownames(observedInteractionCor)

In [None]:
head(observedInteractionCor)

In [None]:
xx = observedInteractionCor[,c("gene", "ObservedCorrelatingInteractions", "SampledCorrelatingInteractions")]
colnames(xx) = c("gene","Observed Interactions",  "Sampled Interactions" )
res = melt(xx, id.vars = "gene")


options(repr.plot.width=6, repr.plot.height=4)
ggpaired(res, x = "variable", y = "value", id = "gene",
         color = "variable", line.color = "gray", line.size = 0.1,
         palette = "jco") +
  labs(y = "Number of significant correlations ") +
  stat_compare_means(method = "wilcox.test", paired = TRUE)+theme(legend.position="None")+xlab("")

In [None]:
xx = observedInteractionCor[,c("gene", "ObservedPositiveInteractions", "SampledPositiveInteractions")]
colnames(xx) = c("gene","Observed Interactions",  "Sampled Interactions" )
res = melt(xx, id.vars = "gene")

options(repr.plot.width=6, repr.plot.height=4)
ggpaired(res, x = "variable", y = "value", id = "gene",
         color = "variable", line.color = "gray", line.size = 0.1,
         palette = "jco") +
  labs(y = "Number of significant positive correlations") +
  stat_compare_means(method = "wilcox.test", paired = TRUE)+theme(legend.position="None")+xlab("")

In [None]:
xx = observedInteractionCor[,c("gene", "ObservedNegativeInteractions", "SampledNegativeInteractions")]
colnames(xx) = c("gene","Observed Interactions",  "Sampled Interactions" )
res = melt(xx, id.vars = "gene")


options(repr.plot.width=6, repr.plot.height=4)
ggpaired(res, x = "variable", y = "value", id = "gene",
         color = "variable", line.color = "gray", line.size = 0.1,
         palette = "jco") +
  labs(y = "Number of significant negative correlations") +
  stat_compare_means(method = "wilcox.test", paired = TRUE)+theme(legend.position="None")+xlab("")