In [None]:
source('../../../../source//basic.r')

source('../../../validataion.r')

In [None]:
library(Rtsne)

In [None]:
metadata <- read.table('../../raw_data/input/metadata.tsv',
                         header = TRUE,
                         stringsAsFactors=FALSE,quote="",row.names=1)

In [None]:
run_pca <- function(mat,num_pcs=50,remove_first_PC=FALSE,scale=FALSE,center=FALSE){
    set.seed(2019) 
    SVD = irlba(mat, num_pcs, num_pcs,scale=scale,center=center,maxit=10000)
    sk_diag = matrix(0, nrow=num_pcs, ncol=num_pcs)
    diag(sk_diag) = SVD$d
    if(remove_first_PC){
        sk_diag[1,1] = 0
        SVD_vd = (sk_diag %*% t(SVD$v))[2:num_pcs,]
    }else{
        SVD_vd = sk_diag %*% t(SVD$v)
    }
    return(SVD_vd)
}

In [None]:
run_umap <- function(fm_mat){
    umap_object = umap(t(fm_mat),random_state = 2019)
    df_umap = umap_object$layout
    return(df_umap)
}

In [None]:
files <- list.files('../chromVAR-motif/rds/',full.names = TRUE,
                    pattern = '*rds')

In [None]:
fun_cal <- function(file,pc=10){
    
    mat=readRDS(file)

    mat.pca = run_pca(mat[!is.na(rowSums(mat)),],num_pcs = pc)
    colnames(mat.pca) <- colnames(mat)
    rownames(mat.pca) <- paste0('PC',1:pc)
    
    
    umap.data <- run_umap(mat.pca)

    tsne.data = Rtsne(t(mat.pca),pca=F,perplexity = 20)$Y
    
    res <- list(df_out=mat.pca%>%as.data.frame,
            tsne=tsne.data,
            umap=umap.data)
    return(res)
}

In [None]:
res.list <- files%>%mclapply(fun_cal,mc.cores = 10)

In [None]:
names(res.list) <- str_extract(files,'(?<=//FM_).*(?=_data1.rds)')

# plot

In [None]:
plot.list <- c()

In [None]:
psize(12,2.2)

In [None]:
res.plot.list <- c()

In [None]:
pdf('plots/plot.pdf',width = 12,height = 2.2)
for(x in names(res.list)){
    
    res.plot.list <- fun_densityClust(res.list[[x]],labels=metadata[,'label'],
                title=x,rho_=15,delta_=15,plot=TRUE)
    
}
dev.off()

# save RDS

In [None]:
dir.create('rds')

In [None]:
res.list%>%names%>%lapply(function(x){
    
    saveRDS(res.list[[x]]$df_out,file = paste0('.//rds/FM_',x,'_data1.rds'))
    
})