In [95]:
library(Rtsne)
library(ggplot2)
library(dplyr)

In [113]:
library(FNN)

In [96]:
predata <- read.table("filter.scimpute_count.Norm_RLE.Batch_null.domains_combined.txt", header = T, row.names = 1, sep = "\t")
postdata <- read.table("filter.scimpute_count.Norm_RLE.Batch_RUV.domains_combined.txt", header = T, row.names = 1, sep = "\t")

In [97]:
batch_info <- read.table("~/Bioinfos/data/scirep_batch.txt", sep = ",", header = T, row.names = 1, stringsAsFactors = T)
batch_info$RNA.Isolation.batch <- factor(batch_info$RNA.Isolation.batch)
batch_temp <- batch_info
batch_temp$names <- rownames(batch_info)
batch_temp <- arrange(batch_temp, names)
rownames(batch_temp) <- batch_temp$names
batch_info <- as.data.frame(batch_temp[,-ncol(batch_temp)])

In [213]:
sample_class <- as.factor(c(rep("Colorectal Cancer", times = 100), rep("Normal", times = 50), 
                                          rep("Normal", times = 6), rep("Prostate Cancer", times = 36)))

In [105]:
#batch_info[which(batch_info$RNA.Isolation.batch == 7), ]

In [100]:
temp <- as.data.frame(t(predata))
temp$names <- rownames(temp)
temp <- arrange(temp, names)
rownames(temp) <- temp$names
predata <- as.data.frame(temp[,-ncol(temp)])

temp <- as.data.frame(t(postdata))
temp$names <- rownames(temp)
temp <- arrange(temp, names)
rownames(temp) <- temp$names
postdata <- as.data.frame(temp[,-ncol(temp)])

In [None]:
p <- Rtsne(predata, dims = 1)
q <- Rtsne(postdata, dims = 1)

In [226]:
pre_p <- cbind(batch_info$RNA.Isolation.batch, p$Y)
post_q <- cbind(batch_info$RNA.Isolation.batch, q$Y)
sum_data <- as.data.frame(rbind(pre_p, post_q))
foo <- c(rep("Before Batch Removal", times = length(p$Y)), rep("After Batch Removal", times = length(q$Y)))
sum_data$Type <- sample_class
sum_data$State <- foo

In [234]:
names(sum_data) <- c("Batch", "t_SNE", "Type", "State")
sum_data$Batch <- as.factor(sum_data$Batch)
sum_data$Type <- as.factor(sum_data$Type)
sum_data$State <- as.factor(sum_data$State)

In [235]:
sum_data

Batch,t_SNE,Type,State
2,5.7892082,Colorectal Cancer,Before Batch Removal
3,2.9437147,Colorectal Cancer,Before Batch Removal
3,-11.6434857,Colorectal Cancer,Before Batch Removal
3,0.6551440,Colorectal Cancer,Before Batch Removal
4,-3.9988097,Colorectal Cancer,Before Batch Removal
4,-6.1821744,Colorectal Cancer,Before Batch Removal
4,-7.0273897,Colorectal Cancer,Before Batch Removal
5,-2.6229096,Colorectal Cancer,Before Batch Removal
5,-1.7272982,Colorectal Cancer,Before Batch Removal
5,-0.2846681,Colorectal Cancer,Before Batch Removal


In [238]:
s <- ggplot(data = sum_data, aes(x = t_SNE, col = Batch))  + geom_density() + facet_wrap(~State)
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
pdf("test_for_batch.pdf", 10, 6)
s
dev.off()

In [239]:
s <- ggplot(data = sum_data, aes(x = t_SNE, col = Type))  + geom_density() + facet_wrap(~State)
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
pdf("test_for_type.pdf", 10, 6)
s
dev.off()

In [166]:
p <- Rtsne(postdata, dims = 1)
tSNE_Vis_before <- as.data.frame(cbind(batch_info$RNA.Isolation.batch, p$Y))
names(tSNE_Vis_before) <- c("Batch", "Y")
s <- ggplot(data = tSNE_Vis_before, aes(x = Y, col = Batch)) + geom_density() 
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("before.png")
s
dev.off()

p <- Rtsne(predata, dims = 1)
tSNE_Vis_after <- as.data.frame(cbind(batch_info$RNA.Isolation.batch, p$Y))
names(tSNE_Vis_after) <- c("Batch", "Y")
tSNE_Vis_after$Batch <- factor(tSNE_Vis_after$Batch)
s <- ggplot(data = tSNE_Vis_after, aes(x = Y, col = Batch)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("after.png")
s
dev.off()

In [125]:
## Test for the cancer/normal -- after
p <- Rtsne(postdata, dims = 1)
tSNE_Vis_after <- as.data.frame(cbind(sample_class, p$Y))
names(tSNE_Vis_after) <- c("Type", "Y")
s <- ggplot(data = tSNE_Vis_after, aes(x = Y, fill = Type)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("after_test.png")
s
dev.off()

p <- Rtsne(predata, dims = 1)
tSNE_Vis_before <- as.data.frame(cbind(sample_class, p$Y))
names(tSNE_Vis_before) <- c("Type", "Y")
s <- ggplot(data = tSNE_Vis_before, aes(x = Y, fill = Type)) + geom_density()
s <- s + scale_fill_brewer(palette="Set2") + 
        theme(panel.grid.major =element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line = element_line(colour = "black")) 
png("before_test.png")
s
dev.off()

In [123]:
t1 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 1)]
t0 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 0)]
t2 <- tSNE_Vis_after$Y[which(tSNE_Vis_after$Type == 2)]

b1 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 1)]
b0 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 0)]
b2 <- tSNE_Vis_before$Y[which(tSNE_Vis_after$Type == 2)]

In [124]:
KL.divergence(t1, t0, k = 1)
KL.divergence(b1, b0, k = 1)