## Creates binarized Seurat objects, and merges them and finds Top Features (min.cutoff=20)

In [1]:
# Input info
cellr_in <- "/data2/isshamie/mito_lineage/data/processed/mtscATAC/jan21_2021/MTblacklist"
samples <- "J2,P2"
sample_names <- "Flt3l,Ctrl"

sample <- "J2"
# Saving
outdir <- "/data/isshamie/mito_lineage/output/annotation/cd34norm/MTblacklist/QC" #"/data2/mito_lineage/Analysis/annotation/output/data/"

# Parameters
nTop = 25000
assay="RNA"

cores = 36

In [2]:
samples <- unlist(strsplit(samples, ",")[[1]])
sample_names <- unlist(strsplit(sample_names, ","))

samples

In [3]:
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
#plan("multiprocess", workers = cores)
#options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
#plan("multiprocess", workers = workers)
#options(future.globals.maxSize = 8000 * 1024^2)

Attaching SeuratObject

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which.max, which.min


Loading required package: S4Vectors

Loading required package: stats4


Attaching package: 

ERROR: Error in plan("multiprocess", workers = cores): could not find function "plan"


In [4]:
counts <- Read10X_h5(filename = file.path( cellr_in, sample, "outs", "filtered_peak_bc_matrix.h5"))
metadata <- read.csv(
  file = file.path( cellr_in, sample, "outs","singlecell.csv"),
  header = TRUE,
  row.names = 1
)

chrom_assay <- CreateChromatinAssay(
  counts = counts,
  sep = c(":", "-"),
  genome = 'hg38',
  fragments = file.path( cellr_in, sample, "outs", "fragments.tsv.gz"),
  min.cells = 10,
  min.features = 200
)

sample <- CreateSeuratObject(
  counts = chrom_assay,
  assay = "peaks",
  meta.data = metadata
)

"'giveCsparse' has been deprecated; setting 'repr = "T"' for you"
Computing hash

"Some cells in meta.data not present in provided counts matrix."


In [6]:
sample[['peaks']]

ChromatinAssay data with 140164 features for 11947 cells
Variable features: 0 
Genome: hg38 
Annotation present: FALSE 
Motifs present: FALSE 
Fragment files: 1 

In [7]:
sample[[]]

Unnamed: 0_level_0,orig.ident,nCount_peaks,nFeature_peaks,total,duplicate,chimeric,unmapped,lowmapq,mitochondrial,passed_filters,cell_id,is__cell_barcode,TSS_fragments,DNase_sensitive_region_fragments,enhancer_region_fragments,promoter_region_fragments,on_target_fragments,blacklist_region_fragments,peak_region_fragments,peak_region_cutsites
Unnamed: 0_level_1,<fct>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<chr>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
AAACGAAAGAGCTCCC-1,SeuratProject,8184,4312,25798,3956,111,95,1643,4740,15253,_cell_0,1,2708,0,0,0,2708,0,4667,8189
AAACGAAAGCGATACG-1,SeuratProject,3975,2033,18340,2478,78,92,794,9325,5573,_cell_1,1,1062,0,0,0,1062,0,2122,3975
AAACGAAAGGCTTCGC-1,SeuratProject,6991,3631,28417,4398,132,122,1771,10224,11770,_cell_2,1,2145,0,0,0,2145,0,3851,6992
AAACGAAAGTACAACA-1,SeuratProject,2440,1286,12018,1524,60,45,585,5688,4116,_cell_3,1,727,0,0,0,727,0,1329,2440
AAACGAACAACGTACT-1,SeuratProject,5443,2815,37690,3034,82,125,1680,24974,7795,_cell_4,1,1562,0,0,0,1562,0,2945,5445
AAACGAACAAGCGGTA-1,SeuratProject,2386,1235,9381,1416,44,50,547,3616,3708,_cell_5,1,705,0,0,0,705,0,1279,2386
AAACGAACACCTGGTG-1,SeuratProject,4977,2525,30421,5698,112,157,1762,14034,8658,_cell_6,1,1708,0,0,0,1708,0,2699,4977
AAACGAACACGATATC-1,SeuratProject,4132,2177,15245,1960,89,32,737,5330,7097,_cell_7,1,1273,0,0,0,1273,0,2274,4133
AAACGAACACTTACAG-1,SeuratProject,19889,9539,48515,10680,240,158,2715,12096,22626,_cell_8,1,4793,0,0,0,4793,0,10788,19892
AAACGAACATTAAGGA-1,SeuratProject,2260,1194,10268,1269,49,50,603,4716,3581,_cell_9,1,669,0,0,0,669,0,1238,2260


In [11]:
# compute nucleosome signal score per cell
sample <- NucleosomeSignal(object = sample)

# compute TSS enrichment score per cell
sample <- TSSEnrichment(object = sample, fast = FALSE)

# add blacklist ratio and fraction of reads in peaks
sample$pct_reads_in_peaks <- sample$peak_region_fragments / sample$passed_filters * 100
sample$blacklist_ratio <- sample$blacklist_region_fragments / sample$peak_region_fragments

"restarting interrupted promise evaluation"


ERROR: Error in NucleosomeSignal(object = sample): cannot open file '/data/isshamie/software/anaconda3/envs/mito_trace/lib/R/library/fastmatch/R/fastmatch.rdb': No such file or directory


In [None]:
PercentageFeatureSet(
  object,
  pattern = NULL,
  features = NULL,
  col.name = NULL,
  assay = NULL
)
