In [None]:
rm(list=ls()) 

In [None]:
figure_folder="Figures"
dir.create(figure_folder)

In [None]:
library(ggplot2)
library(dplyr)
library(Rtsne)

In [None]:
departure_cell="ProB"
destination_cell="Mono"

In [None]:
imputed_data=read.table("Data_prob_mono/scRecover+scImpute.csv", sep=",", header=TRUE, row.names=1)

In [None]:
raw_data=read.table("Data_prob_mono/raw_data.csv", sep=",", header=TRUE, row.names=1)

In [None]:
rel_TFs=unlist(read.table("../TFS", header=FALSE))

In [None]:
rel_TFs[rel_TFs=="CEBPa"]="CEBPA"
rel_TFs[rel_TFs=="CEBPb"]="CEBPB"
rel_TFs[rel_TFs=="E2A"]="TCF3"
rel_TFs[rel_TFs=="EBF"]="EBF1"
rel_TFs[rel_TFs=="Eto2"]="CBFA2T3"
rel_TFs[rel_TFs=="Fli1"]="FLI1"
rel_TFs[rel_TFs=="Foxo1"]="FOXO1"
rel_TFs[rel_TFs=="Gata1"]="GATA1"
rel_TFs[rel_TFs=="Gata2"]="GATA2"
rel_TFs[rel_TFs=="Gfi1b"]="GFI1B"
rel_TFs[rel_TFs=="Ldb1"]="LDB1"
rel_TFs[rel_TFs=="Lmo2"]="LMO2"
rel_TFs[rel_TFs=="Lyl1"]="LYL1"
rel_TFs[rel_TFs=="Meis1"]="MEIS1"
rel_TFs[rel_TFs=="Mtgr1"]="CBFA2T2"
rel_TFs[rel_TFs=="Oct2"]="POU2F2"
rel_TFs[rel_TFs=="p300"]="EP300"
rel_TFs[rel_TFs=="P65"]="RELA"
rel_TFs[rel_TFs=="Pparg"]="PPARG"
rel_TFs[rel_TFs=="PU1"]="SPI1"
rel_TFs[rel_TFs=="Runx1"]="RUNX1"
rel_TFs[rel_TFs=="SCL"]="TAL1"
rel_TFs[rel_TFs=="Stat3"]="STAT3"
rel_TFs[rel_TFs=="Stat4"]="STAT4"
rel_TFs[rel_TFs=="Stat5a"]="STAT5A"
rel_TFs[rel_TFs=="Stat5b"]="STAT5B"
rel_TFs[rel_TFs=="Stat6"]="STAT6"

In [None]:
labels=read.table("cell_types_prob_mono.txt", header=TRUE)
rownames(labels)=unlist(labels[,"CellID"])

In [None]:
departure_cells=labels[which(labels[,"CellType"]==departure_cell),"CellID"]                       
destination_cells=labels[which(labels[,"CellType"]==destination_cell),"CellID"]

In [None]:
TF_data=imputed_data[rel_TFs,]

In [None]:
departure_data=TF_data[,departure_cells]
destination_data=TF_data[,destination_cells]

In [None]:
labels=c(unlist(labels[departure_cells,"CellType"]),unlist(labels[destination_cells,"CellType"]))

In [None]:
all_tf_data=cbind(departure_data, destination_data)
bin_tf_data=(all_tf_data>0)+0

In [None]:
TF_raw_data=raw_data[rel_TFs,]
departure_raw_data=TF_raw_data[,departure_cells]
destination_raw_data=TF_raw_data[,destination_cells]
all_tf_raw_data=cbind(departure_raw_data, destination_raw_data)

In [None]:
zero_raw=length(which(all_tf_raw_data==0))
non_zero_raw=length(which(all_tf_raw_data>0))
zero_all=length(which(all_tf_data==0))
non_zero_all=length(which(all_tf_data>0))
changed=length(intersect(which(all_tf_raw_data==0), which(all_tf_data>0)))

In [None]:
counts_mat_raw= as.data.frame(cbind(unlist(all_tf_raw_data[all_tf_raw_data>0]), "Before Imputation"))
counts_mat_imputed=as.data.frame(cbind(unlist(all_tf_data[all_tf_data>0]), "After Imputation"))
colnames(counts_mat_raw)=c("Counts", "Imputation")
colnames(counts_mat_imputed)=c("Counts", "Imputation")

counts_mat_raw$Counts=as.numeric(counts_mat_raw$Counts)
counts_mat_imputed$Counts=as.numeric(counts_mat_imputed$Counts)
counts_mat=rbind(counts_mat_raw,counts_mat_imputed)
counts_mat$Imputation_ordered = factor(counts_mat$Imputation, levels=c('Before Imputation','After Imputation'))

In [None]:
nonzero_counts_plot=ggplot(counts_mat, aes(x = Counts, fill = Imputation)) +    # Create boxplot chart in ggplot2
geom_histogram(binwidth=1,  boundary = 0)+theme_bw() +theme(text = element_text(size = 12)) + 
theme(axis.text.x = element_text(vjust = 1, hjust=1, angle = 45)) + scale_x_continuous(n.breaks =3) +
facet_grid(~Imputation_ordered , 
             switch = "x" )  +  
xlab("Non-Zero Counts") + ylab("Frequency") 
labs(fill='Imputation Status') 


In [None]:
ggsave(nonzero_counts_plot, file=paste(figure_folder, "/no_zero_histogram.pdf", sep=""), width = 7, height = 4, units ="in",  dpi = 300)
ggsave(nonzero_counts_plot, file=paste(figure_folder, "/no_zero_histogram.png", sep=""), width = 7, height = 4, units ="in",  dpi = 300)

In [None]:
set.seed(1)
tsne_result=Rtsne(t(bin_tf_data), check_duplicates = FALSE,  theta=0)

In [None]:
tsne_out=data.frame(data=tsne_result$Y, labels=labels)

In [None]:
df <- data.frame(x = tsne_out[,1],
                 y = tsne_out[,2], labels=tsne_out[,3])

tsne_plot=ggplot(df, aes(x=x, y=y)) +
geom_point(aes(colour = factor(labels))) +
xlab("tSNE Dimension 1")+ ylab("tSNE Dimension 2")+
labs(color="Cell Type")+ theme_bw() +theme(text = element_text(size = 12))  

In [None]:
ggsave(tsne_plot, file=paste(figure_folder, "/tsne_plot.pdf", sep=""), width = 7, height = 4, units ="in",  dpi = 300)
ggsave(tsne_plot, file=paste(figure_folder, "/tsne_plot.png", sep=""), width = 7, height = 4, units ="in",  dpi = 300)