In [None]:
library(cisTopic)

library(umap)

library(Rtsne)

library('tidyverse')

library(ggplot2)

In [None]:
psize<-function (w = 6, h = 6) 
{
    options(repr.plot.width = w, repr.plot.height = h)
}

In [None]:
run_umap <- function(fm_mat){
    umap_object = umap(t(fm_mat),random_state = 2019)
    df_umap = umap_object$layout
    return(df_umap)
}


In [None]:
set.seed(2019)

In [None]:
metadata <- read.table('../../raw_data/input/metadata.tsv',
                         header = TRUE,
                         stringsAsFactors=FALSE,quote="",row.names=1)

In [None]:
pathToBams <- '../../raw_data/bam/files/sc-bams_nodup/'

In [None]:
bamFiles <- paste(pathToBams, list.files(pathToBams), sep='')

In [None]:
cellnames <- sapply(strsplit(basename(bamFiles),'.',fixed = TRUE), "[[", 1)
head(cellnames)

In [None]:
ix = match(rownames(metadata),cellnames)
bamFiles = bamFiles[ix]
cellnames = cellnames[ix]

# ALL

In [None]:
regions <- '../../raw_data/input/combined.sorted.merged.bed'

In [None]:
suppressMessages({
cisTopicObject <- createcisTopicObjectFromBAM(bamFiles, regions, project.name='buenrostro2018',paired = TRUE)
cisTopicObject <- renameCells(cisTopicObject, cellnames)

cisTopicObject <- runCGSModels(cisTopicObject, topic=c(10, 20, 25, 30, 35, 40), seed=987, nCores=10, burnin = 120, iterations = 150, addModels=FALSE)

cisTopicObject <- selectModel(cisTopicObject)})

cellassign <- modelMatSelection(cisTopicObject, 'cell', 'Probability')

In [None]:
df_out <- cellassign

tsne <- Rtsne(t(df_out), perplexity = 50, 
                       check_duplicates = FALSE, pca=FALSE, theta=0.01, max_iter=1000)

umap <- run_umap(df_out[!is.na(rowSums(df_out)),])

res.all <- list(df_out=df_out, umap=umap, tsne=tsne)

# top 5k HVFs

In [1]:
regions <- '../../cpeaks_filteredFeature/combinedPeaks.top.regions.bed'

In [None]:
suppressMessages({
cisTopicObject <- createcisTopicObjectFromBAM(bamFiles, regions, project.name='buenrostro2018',paired = TRUE)
cisTopicObject <- renameCells(cisTopicObject, cellnames)

cisTopicObject <- runCGSModels(cisTopicObject, topic=c(10, 20, 25, 30, 35, 40), seed=987, nCores=10, burnin = 120, iterations = 150, addModels=FALSE)

cisTopicObject <- selectModel(cisTopicObject)})

cellassign <- modelMatSelection(cisTopicObject, 'cell', 'Probability')

In [None]:
df_out <- cellassign

tsne <- Rtsne(t(df_out), perplexity = 50, 
                       check_duplicates = FALSE, pca=FALSE, theta=0.01, max_iter=1000)

umap <- run_umap(df_out[!is.na(rowSums(df_out)),])

res.top <- list(df_out=df_out, umap=umap, tsne=tsne)

# top 50 HVFs

In [None]:
regions <- '../../cpeaks_filteredFeature/combinedPeaks.top2.regions.bed'

In [None]:
suppressMessages({
cisTopicObject <- createcisTopicObjectFromBAM(bamFiles, regions, project.name='buenrostro2018',paired = TRUE)
cisTopicObject <- renameCells(cisTopicObject, cellnames)

cisTopicObject <- runCGSModels(cisTopicObject, topic=c(10, 20, 25, 30, 35, 40), seed=987, nCores=10, burnin = 120, iterations = 150, addModels=FALSE)

cisTopicObject <- selectModel(cisTopicObject)})

cellassign <- modelMatSelection(cisTopicObject, 'cell', 'Probability')

In [None]:
df_out <- cellassign

tsne <- Rtsne(t(df_out), perplexity = 50, 
                       check_duplicates = FALSE, pca=FALSE, theta=0.01, max_iter=1000)

umap <- run_umap(df_out[!is.na(rowSums(df_out)),])

res.top2 <- list(df_out=df_out, umap=umap, tsne=tsne)

# save RDS

In [None]:
rds.list <- list(
    res.all=res.all,
    res.top=res.top,
    res.top2=res.top2
                )

rds.list%>%names%>%lapply(function(x){
    
    saveRDS(rds.list[[x]]$df_out,file = paste0('./rds/FM_combinedPeaks-',gsub('\\.','-',x),'_data1.rds'))
    
})