In [None]:
source('../../../../source//basic.r')

source('../../../validataion.r')

source('method.r')

In [None]:
register(MulticoreParam(60))

In [None]:
metadata <- read.table('../../raw_data/input/metadata.tsv',
                         header = TRUE,
                         stringsAsFactors=FALSE,quote="",row.names=1)

In [None]:
bamfile <- list.files(path = "../../raw_data/bam/files/sc-bams_nodup/", pattern = "\\.bam$")

# ALL

In [None]:
peakfile <- "../../raw_data/input/combined.sorted.merged.bed"

In [None]:
suppressMessages({
    res.all <- fun_all(peakfile)
})


In [None]:
a=fun_densityClust(res = res.all,labels=metadata[,'label'],title='all',
                                   rho_=15,delta_=10)

# top 5K

In [None]:
peakfile.top <- "./regions//combinedPeaks.top.regions.bed"

In [None]:
peakfile <- peakfile.top

In [None]:
suppressMessages({   
    peaks <- getPeaks(peakfile, sort_peaks = TRUE)
    peaks <- resize(peaks, width = 500, fix = "center")

    seqinfo(peaks) <- Seqinfo(genome="hg19")
    peaks <- trim(peaks)

    cellnames <- sapply(strsplit(bamfile,'.',fixed = TRUE), "[[", 1)

    fragment_counts <- getCounts(paste0("../../../../../test_data/Buenrostro_2018/bam/files/sc-bams_nodup/",bamfile), 
                                 peaks, 
                                 paired =  TRUE, 
                                 by_rg = TRUE, 
                                 format = "bam", 
                                 colData = data.frame(celltype = cellnames))

    fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg19)

    counts_filtered <- filterPeaks(fragment_counts, non_overlapping = TRUE)

    bg <- getBackgroundPeaks(counts_filtered)
    # Potentially save the bg object
    # saveRDS(bg, file = "bulkPeaks_background_peaks_kmers.rds")

    kmer_ix <- matchKmers(6, counts_filtered, genome = BSgenome.Hsapiens.UCSC.hg19)

    dev <- computeDeviations(object = counts_filtered, annotations = kmer_ix,
                             background_peaks = bg)

    df_zscores = dev@assays@data$z

    df_out <- df_zscores

    ############## UMAP ############

    ## subset according to variance, to make feature number less than observation to plot umap
    df = df_out

    df.vars=df%>%apply(1,var)
    df.means=df%>%apply(1,mean)

    df.plot <- df.vars%>%cbind(df.means)%>%as.data.frame%>%rename_with(~c('var','mean'))
    df.plot <- df.plot%>%mutate(rank.var= base::rank(plyr::desc(var)),
                                rank.mean=base::rank(plyr::desc(mean)),
                                labels=ifelse(rank.var<=1500,'variable','non-variable'))
    # psize()
    # df.plot%>%ggplot(aes(x=mean,y=var))+geom_point(aes(color=labels),cex=1,alpha=0.5)+theme_classic()#+xlim(c(0,20))+ylim(c(0,400))

    select.peaks <- df.plot%>%filter(labels=='variable')%>%rownames
    df_out.sub <- df_out[select.peaks,]

    umap <- run_umap(df_out.sub)

    ################# TSNE #################
    tsne <- deviationsTsne(dev, threshold = 1.5, perplexity = 10)

    # variability <- computeVariability(dev)
    # plotVariability(variability, use_plotly = FALSE)
})
return(list(df_out=df_out,
        tsne=tsne,
        umap=umap))

tsne <- deviationsTsne(dev, threshold = 0.5, perplexity = 10)

res.top <- (list(df_out=df_out,
        tsne=tsne,
        umap=umap))

## top 50K

In [None]:
peakfile.top <- "./regions//combinedPeaks.top2.regions.bed"

In [None]:
peakfile <- peakfile.top

In [None]:
suppressMessages({   
    peaks <- getPeaks(peakfile, sort_peaks = TRUE)
    peaks <- resize(peaks, width = 500, fix = "center")

    seqinfo(peaks) <- Seqinfo(genome="hg19")
    peaks <- trim(peaks)

    cellnames <- sapply(strsplit(bamfile,'.',fixed = TRUE), "[[", 1)

    fragment_counts <- getCounts(paste0("../../../../../test_data/Buenrostro_2018/bam/files/sc-bams_nodup/",bamfile), 
                                 peaks, 
                                 paired =  TRUE, 
                                 by_rg = TRUE, 
                                 format = "bam", 
                                 colData = data.frame(celltype = cellnames))

    fragment_counts <- addGCBias(fragment_counts, genome = BSgenome.Hsapiens.UCSC.hg19)

    counts_filtered <- filterPeaks(fragment_counts, non_overlapping = TRUE)

    bg <- getBackgroundPeaks(counts_filtered)
    # Potentially save the bg object
    # saveRDS(bg, file = "bulkPeaks_background_peaks_kmers.rds")

    kmer_ix <- matchKmers(6, counts_filtered, genome = BSgenome.Hsapiens.UCSC.hg19)

    dev <- computeDeviations(object = counts_filtered, annotations = kmer_ix,
                             background_peaks = bg)

    df_zscores = dev@assays@data$z

    df_out <- df_zscores

    # variability <- computeVariability(dev)
    # plotVariability(variability, use_plotly = FALSE)
})


tsne <- deviationsTsne(dev, threshold = 0.5, perplexity = 10)

umap <- run_umap(df_out.sub)

res.top2 <- (list(df_out=df_out,
        tsne=tsne,
        umap=umap))

# plot

In [None]:
psize(12,2.2)

In [None]:
plot.list <- c()

In [None]:
plot.list[['all']]=fun_densityClust(res = res.all,labels=metadata[,'label'],title='all')
plot.list[['top']]=fun_densityClust(res = res.top,labels=metadata[,'label'],title='top',
                   rho_=15,delta_=15)
plot.list[['top2']]=fun_densityClust(res = res.top2,labels=metadata[,'label'],title='top2',
                   rho_=15,delta_=15)

In [None]:
dir.create('plots')

In [None]:
pdf('./plots/combinedPeaks.pdf',width = 12,height = 2)
plot.list%>%lapply(function(x){grid.arrange(x$plot$plot.clusterNum,
                            x$plot$plot.tsne.cluster,
                            x$plot$plot.umap.cluster,
                           x$plot$plot.tsne.label+theme(legend.position = 'none'),
                           x$plot$plot.umap.label+theme(legend.position = 'none'),ncol=5)})
dev.off()

# save RDS

In [None]:
rds.list <- list(res.all=res.all,
    res.top=res.top,res.top2=res.top2)

In [None]:
rds.list%>%names%>%lapply(function(x){
    
    saveRDS(rds.list[[x]]$df_out,file = paste0('.//rds/FM_combinedPeaks-',gsub('\\.','-',x),'_data1.rds'))
    
})