In [1]:
library(Rtsne)
library(ggplot2)
library(dplyr)
library(FNN)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
predata <- read.table("~/fig3/output/merge/matrix_processing/filter.null.Norm_TMM.mirna_and_domains.txt", header = T, sep = "\t")
predata <- as.data.frame(t(predata))
postdata <- read.table("~/fig3/batch_correction/TMM_ruv_batchremoval.txt", header = T, row.names = 1, sep = "\t")
postdata <- as.data.frame(t(postdata))
batch_info <- read.table("~/fig3/data/merge/batch_info.txt", header = T, row.names = 2, stringsAsFactors = T)
sample_classes <- read.table("~/fig3/data/merge/sample_classes.txt", header = T, row.names = 1, sep = "\t")

In [3]:
predata <- read.table("filter.scimpute_count.Norm_RLE.Batch_null.domains_combined.txt", header = T, row.names = 1, sep = "\t")
postdata <- read.table("filter.scimpute_count.Norm_RLE.Batch_RUV.domains_combined.txt", header = T, row.names = 1, sep = "\t")

In [4]:
batch_info <- read.table("~/Bioinfos/data/scirep_batch.txt", sep = ",", header = T, row.names = 1, stringsAsFactors = T)
batch_info$RNA.Isolation.batch <- factor(batch_info$RNA.Isolation.batch)
batch_temp <- batch_info
batch_temp$names <- rownames(batch_info)
batch_temp <- arrange(batch_temp, names)
rownames(batch_temp) <- batch_temp$names
batch_info <- as.data.frame(batch_temp[,-ncol(batch_temp)])

In [5]:
sample_class <- as.factor(c(rep("Colorectal Cancer", times = 100), rep("Normal", times = 50), 
                                          rep("Normal", times = 6), rep("Prostate Cancer", times = 36)))

In [6]:
#batch_info[which(batch_info$RNA.Isolation.batch == 7), ]

In [7]:
temp <- as.data.frame(t(predata))
temp$names <- rownames(temp)
temp <- arrange(temp, names)
rownames(temp) <- temp$names
predata <- as.data.frame(temp[,-ncol(temp)])

temp <- as.data.frame(t(postdata))
temp$names <- rownames(temp)
temp <- arrange(temp, names)
rownames(temp) <- temp$names
postdata <- as.data.frame(temp[,-ncol(temp)])

In [8]:
p <- Rtsne(predata, dims = 1)
q <- Rtsne(postdata, dims = 1)

In [19]:
pre_p <- cbind(batch_info$RNA.Isolation.batch, p$Y)
post_q <- cbind(batch_info$RNA.Isolation.batch, q$Y)
sum_data <- as.data.frame(rbind(pre_p, post_q))
foo <- c(rep("1.Without Batch Correction", times = length(p$Y)), rep("2.With Batch Correction", times = length(q$Y)))
sum_data$Type <- sample_class
sum_data$State <- foo

In [20]:
names(sum_data) <- c("Batch", "t_SNE", "Type", "State")
sum_data$Batch <- as.factor(sum_data$Batch)
sum_data$Type <- as.factor(sum_data$Type)
sum_data$State <- as.factor(sum_data$State)

In [24]:
head(sum_data)

Batch,t_SNE,Type,State
2,7.3051443,Colorectal Cancer,1.Without Batch Removal
3,3.7251037,Colorectal Cancer,1.Without Batch Removal
3,5.5081432,Colorectal Cancer,1.Without Batch Removal
3,0.4477047,Colorectal Cancer,1.Without Batch Removal
4,-5.0538195,Colorectal Cancer,1.Without Batch Removal
4,-8.0436693,Colorectal Cancer,1.Without Batch Removal


In [28]:
s <- ggplot(data = sum_data, aes(x = t_SNE, col = Batch))  + geom_density() + facet_wrap(~State, scales = "free_x")
s <- s + scale_fill_brewer(palette="Set2") +
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
pdf("test_for_batch.pdf", 10, 6)
s
dev.off()

In [29]:
s <- ggplot(data = sum_data, aes(x = t_SNE, col = Type))  + geom_density() + facet_wrap(~State, scales = "free_x")
s <- s + scale_fill_brewer(palette="Set2") +
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
pdf("test_for_type.pdf", 10, 6)
s
dev.off()

In [14]:
p <- Rtsne(postdata, dims = 1)
tSNE_Vis_before <- as.data.frame(cbind(batch_info$RNA.Isolation.batch, p$Y))
names(tSNE_Vis_before) <- c("Batch", "Y")
s <- ggplot(data = tSNE_Vis_before, aes(x = Y, col = Batch)) + geom_density() 
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("before.png")
s
dev.off()

p <- Rtsne(predata, dims = 1)
tSNE_Vis_after <- as.data.frame(cbind(batch_info$RNA.Isolation.batch, p$Y))
names(tSNE_Vis_after) <- c("Batch", "Y")
tSNE_Vis_after$Batch <- factor(tSNE_Vis_after$Batch)
s <- ggplot(data = tSNE_Vis_after, aes(x = Y, col = Batch)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("after.png")
s
dev.off()

In [15]:
## Test for the cancer/normal -- after
p <- Rtsne(postdata, dims = 1)
tSNE_Vis_after <- as.data.frame(cbind(sample_class, p$Y))
names(tSNE_Vis_after) <- c("Type", "Y")
s <- ggplot(data = tSNE_Vis_after, aes(x = Y, fill = Type)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("after_test.png")
s
dev.off()

p <- Rtsne(predata, dims = 1)
tSNE_Vis_before <- as.data.frame(cbind(sample_class, p$Y))
names(tSNE_Vis_before) <- c("Type", "Y")
s <- ggplot(data = tSNE_Vis_before, aes(x = Y, fill = Type)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("before_test.png")
s
dev.off()

In [123]:
t1 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 1)]
t0 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 0)]
t2 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 2)]

b1 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 1)]
b0 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 0)]
b2 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 2)]

In [124]:
KL.divergence(t1, t0, k = 1)
KL.divergence(b1, b0, k = 1)