In [None]:
source("Main.R")
source("Conf.R")
source("Utilities.R")

setwd(rDir)

In [None]:
allWeights <- read.csv("./GuideCellLM/GuideSelect_weights.csv", 
                       row.names = 1,
                       stringsAsFactors = F)


pValsAll <- read.csv("./GuideCellLM/GuideSelect_pvals.csv", 
                     row.names = 1,
                     stringsAsFactors = F)

In [None]:
M = cor(allWeights, method = "pearson")
M <- as.data.frame(M)
myDist <- unlist(M)
myDistFlat = data.frame(guidea=names(myDist), sDist= myDist)
myDistFlat$guidea = rep(rownames(M), each=nrow(M))
myDistFlat$guideb = rep(rownames(M), times = nrow(M))

myDistFlat <- myDistFlat[myDistFlat$guidea != myDistFlat$guideb,]
myDistFlat$guides_ab = apply(myDistFlat[,c("guidea","guideb")], 1,
                             function(x){return(paste(sort(x), collapse = "_"))})

myDistFlat$guidea <- NULL
myDistFlat$guideb <- NULL

myDistFlat$GeneA <- sapply(myDistFlat$guides_ab, function(x){strsplit(x,"_")[[1]][1]})
myDistFlat$GeneB <- sapply(myDistFlat$guides_ab, function(x){strsplit(x,"_")[[1]][3]})

myDistFlat$sameGene <- FALSE
myDistFlat[myDistFlat$GeneA == myDistFlat$GeneB,"sameGene"] <- TRUE


In [None]:
allWeights <- read.csv("./GuideCellLM/GuideSelect_weights.csv", 
                       row.names = 1,
                       stringsAsFactors = F)

In [None]:
allControlCoefs = read.csv("./ControlGuideEffects/Control_coefs.csv".
                           row.names = 1,
                           stringsAsFactors = F)
allPValues = read.csv("./ControlGuideEffects/Control_pValues.csv",
                     row.names = 1,
                     stringsAsFactors = F)

In [None]:
allControlCoefs = t(allControlCoefs)
allPValues = t(allPValues)

In [None]:
M_control = cor(allWeights, allControlCoefs[1:nrow(allWeights), ], method = "pearson")

In [None]:
M_control <- as.data.frame(M_control)
M_control$TargetGuide = rownames(M_control)
myDist <- melt(M_control)


In [None]:
myDist$GeneA = sapply(myDist$variable, function(x){k = strsplit(as.character(x),"_")[[1]]
                                         return(paste0(k[-length(k)], collapse="_"))})
myDist$GeneB = "Target"
myDist$sameGene = "NO_TARGET_CONTROL"
myDist[myDist$GeneA == "ONE_NONGENE_SITE", "sameGene"] = "ONE_NONGENE_SITE_CONTROL"
myDist$guides_ab = paste0(myDist$GeneA, "_", myDist$GeneB)

In [None]:
myDist <- myDist[,c("value", "guides_ab", "GeneA", "GeneB", "sameGene")]
colnames(myDist) = c("sDist", "guides_ab", "GeneA", "GeneB", "sameGene")

In [None]:
myDistFlat <- readRDS(paste0(rdsDir,"GuideSelect_betaCovPearsonFlat.rds") )

In [None]:
myDistFlat[myDistFlat$sameGene == "TRUE","sameGene"] = "SAME_GENE"
myDistFlat[myDistFlat$sameGene == "FALSE","sameGene"] = "DIFFERENT_GENE"

In [None]:
myDistFlatALL <- rbind(myDistFlat, myDist)

In [None]:
mu <- ddply(myDistFlatALL, "sameGene", summarise, grp.mean=mean(sDist))
mu

In [None]:
options(repr.plot.width=10, repr.plot.height=4)

pdf("./../SupplementaryFigures/S_1F.pdf", width = 10, height = 4)
ggplot(myDistFlatALL, aes(sDist, colour = sameGene)) +
  stat_ecdf(geom = "step")+theme_minimal()+
labs(
     x="Pearson correlation between guide effect sizes", 
     y = "CDF")+theme(axis.text = element_text(size=15),
              axis.title =  element_text(size=16))+xlim(-0.25, 0.25)

dev.off()

In [None]:
mu <- ddply(myDistFlatALL, "sameGene", summarise, grp.mean=mean(sDist))
options(repr.plot.width=10, repr.plot.height=5)


ggplot(myDistFlatALL, aes(x=sDist, color=sameGene, fill=sameGene)) +
geom_histogram(aes(y=..density..), position="identity", alpha=0.3, binwidth = 0.005)+
#geom_density(alpha=0.6)+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sameGene),
           linetype="dashed")+
# geom_vline(xintercept=0.015, color="black",
#            linetype="dashed")+
#scale_color_manual(values=c("#E69F00", "#999999", "blue"))+
#scale_fill_manual(values=c("#E69F00", "#999999", "blue"))+
labs(
     x="Pearson correlation between guide effect sizes", 
     y = "Density")+
theme_bw()+theme(axis.text = element_text(size=15),
              axis.title =  element_text(size=16)) +
scale_x_continuous(breaks = seq(-0.1,0.5,0.1), 
                   labels =  seq(-0.1,0.5,0.1),
                   limits = c(-0.1,0.5) )

In [None]:
x <- myDistFlatALL[myDistFlatALL$sameGene == "SAME_GENE", "sDist"]
y <- myDistFlatALL[myDistFlatALL$sameGene == "DIFFERENT_GENE", "sDist"]

In [None]:
x <- myDistFlatALL[myDistFlatALL$sameGene == "SAME_GENE", "sDist"]
y <- myDistFlatALL[myDistFlatALL$sameGene == "CONTROL_GUIDE", "sDist"]
ks.test(x,y, alternative="l")