## Creates binarized Seurat objects, integrates conditions and annotates genes by nearby peaks

In [1]:
# Input info
#cellr_in = "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/DUPI_april08_2021_Croker/MTblacklist_mtasnucl/" 
#outdir = "/mnt/md0/isshamie/Projects/Mito_Trace/output/pipeline/DUPI_april08_2021/MTblacklist_mtasnucl_Bonly/data/annotation/gff_hg38_1_2/mergedSamples/"

# Parameters
cores = 24

gff_id = "A2_black"

annotations_indir = "/mnt/md0/isshamie/Projects/Mito_Trace/data/processed/annotation_granges/"
config_f = "/data/Mito_Trace/parameters/pipeline/cosmo_server_v02/chip_paper/CHIP_aggr_samples.yaml"
out_f = "/mnt/md0/isshamie/Projects/Mito_Trace/output/CHIP_aggr/data/annotation/gff_A2/mergedSamples/merged_peaks.bed"
outdir =  "/mnt/md0/isshamie/Projects/Mito_Trace/output/CHIP_aggr/data/annotation/gff_A2/mergedSamples"

In [2]:
library(repr)
options(repr.plot.width=12, repr.plot.height=12)

In [3]:
library(yaml)

samps <- read_yaml(config_f)$samples_csv
samps

“incomplete final line found on '/data/Mito_Trace/parameters/pipeline/cosmo_server_v02/chip_paper/CHIP_aggr_samples.yaml'”


In [4]:
annotations <- readRDS(file.path(annotations_indir, paste0(gff_id, ".annotationGranges.rds")))

In [5]:
annotations

Loading required package: GenomicRanges

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    union, unique, unsplit, which.max, which.min


Loading required package: S4Vectors


Attaching package: ‘S4Vectors’


The following objects are masked from ‘package:base’:

    expand.grid, I, unname


Loading required package: IRanges

Loading required package: GenomeInfoDb



GRanges object with 2765969 ranges and 25 metadata columns:
              seqnames        ranges strand |   source       type     score
                 <Rle>     <IRanges>  <Rle> | <factor>   <factor> <numeric>
        [1]       chr1   29554-31109      + |   HAVANA gene              NA
        [2]       chr1   29554-31097      + |   HAVANA transcript        NA
        [3]       chr1   29554-30039      + |   HAVANA exon              NA
        [4]       chr1   30564-30667      + |   HAVANA exon              NA
        [5]       chr1   30976-31097      + |   HAVANA exon              NA
        ...        ...           ...    ... .      ...        ...       ...
  [2765965] KI270734.1 138483-138667      - |  ENSEMBL CDS               NA
  [2765966] KI270734.1 138480-138482      - |  ENSEMBL stop_codon        NA
  [2765967] KI270734.1 161689-161852      - |  ENSEMBL UTR               NA
  [2765968] KI270734.1 161587-161626      - |  ENSEMBL UTR               NA
  [2765969] KI270734.1 13808

In [6]:
library(GenomicRanges)
library(Seurat)
library(Signac)
library(GenomeInfoDb)
#library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
library(data.table)
library(magrittr)
library(cowplot)
library(metap)
library(dplyr)
library(future)

plan("multiprocess", workers = cores)
options(future.globals.maxSize = 8000 * 1024^2)
#options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
#plan("multiprocess", workers = workers)

Attaching SeuratObject


Attaching package: ‘data.table’


The following object is masked from ‘package:GenomicRanges’:

    shift


The following object is masked from ‘package:IRanges’:

    shift


The following objects are masked from ‘package:S4Vectors’:

    first, second



Attaching package: ‘cowplot’


The following object is masked from ‘package:patchwork’:

    align_plots



Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:GenomicRanges’:

    intersect, setdiff, union


The following object is masked from ‘package:GenomeInfoDb’:

    intersect


The following objects are masked from ‘package:IRanges’:

    collapse, desc, intersect, setdiff, slice, union


The following objects are masked from ‘package:S4Vectors’:

    first, intersect, rename, setdiff, setequal, union


The following objects are masked from ‘package:BiocGenerics’:

    combine, intersect, set

## Merge all peaks

In [7]:
read.peaks <- function(curr.samp.f){
    print('curr.samp')
    print(curr.samp.f)
    print(stringr::str_replace(curr.samp.f, "barcodes.tsv", "peaks.bed"))
    peaks <- read.table(
      file = stringr::str_replace(curr.samp.f, "barcodes.tsv", "peaks.bed"),
      col.names = c("chr", "start", "end")
    )
    # convert to genomic ranges
    gr <- makeGRangesFromDataFrame(peaks)
    return(gr)
}


run.experiments <- function(sample_csv){
    curr.csv <- readr::read_csv(sample_csv)
    print(curr.csv)
    curr.grs <- sapply(curr.csv$barcode_f, read.peaks)
    return(curr.grs)
}

gr.full <- sapply(samps, run.experiments)


[1mRows: [22m[34m3[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[90m# A tibble: 3 × 5[39m
  sample_name cellr_ID fastq_ID             barcode_f                      bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m                [3m[90m<chr>[39m[23m                          [3m[90m<chr>[39m[23m
[90m1[39m Control     Control  BC_10xATAC_rxn1b_neg /home/isaac/lewis/mito_lineag… /hom…
[90m2[39m Flt3l       Flt3l    BC_10xATAC_rxn1b_pos /home/isaac/lewis/mito_lineag… /hom…
[90m3[39m Input       Input    BC_10xATAC_rxn1      /home/isaac/lewis/mito_lineag… /hom…
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b1/MTBlacklist_A2/Control/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b1/MTBlacklist_A2/Control/outs/filtered_peak_bc_matrix/peaks.bed"
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b1/MTBlacklist_A2/Flt3l/outs/filtered_peak_bc_matrix/barcod

[1mRows: [22m[34m1[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[90m# A tibble: 1 × 5[39m
  sample_name cellr_ID fastq_ID        barcode_f                           bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m           [3m[90m<chr>[39m[23m                               [3m[90m<chr>[39m[23m
[90m1[39m Input       Input    BC_10xATAC_rxn2 /home/isaac/lewis/mito_lineage/out… /hom…
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b2/MTBlacklist_A2/Input/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b2/MTBlacklist_A2/Input/outs/filtered_peak_bc_matrix/peaks.bed"


[1mRows: [22m[34m2[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[90m# A tibble: 2 × 5[39m
  sample_name cellr_ID fastq_ID      barcode_f                             bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m         [3m[90m<chr>[39m[23m                                 [3m[90m<chr>[39m[23m
[90m1[39m Input       Flt3l    BC_10xATAC_Fp /home/isaac/lewis/mito_lineage/outpu… /hom…
[90m2[39m Flt3l       Control  BC_10xATAC_Fn /home/isaac/lewis/mito_lineage/outpu… /hom…
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_april08_2021_Croker/MTblacklist/Flt3l/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_april08_2021_Croker/MTblacklist/Flt3l/outs/filtered_peak_bc_matrix/peaks.bed"
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_april08_2021_Croker/MTblacklist/Control/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_april08_202

[1mRows: [22m[34m2[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[90m# A tibble: 2 × 5[39m
  sample_name cellr_ID fastq_ID      barcode_f                             bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m         [3m[90m<chr>[39m[23m                                 [3m[90m<chr>[39m[23m
[90m1[39m Control     P2       BC_10xATAC_P2 /home/isaac/lewis/mito_lineage/outpu… /hom…
[90m2[39m Flt3l       J2       BC_10xATAC_J2 /home/isaac/lewis/mito_lineage/outpu… /hom…
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/jan21_2021/MTblacklist/P2/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/jan21_2021/MTblacklist/P2/outs/filtered_peak_bc_matrix/peaks.bed"
[1] "curr.samp"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/jan21_2021/MTblacklist/J2/outs/filtered_peak_bc_matrix/barcodes.tsv"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/jan21_2021/MTblacklist/J2/outs/filtered_peak_bc_matrix/peaks.bed"


In [8]:
gr.full <- unlist(gr.full, recursive=FALSE, use.names=FALSE)


In [9]:
gr.full.c <- gr.full[[1]]
if (length(gr.full)>1){
    for (i in 2:length(gr.full)){
      gr.full.c <- c(gr.full.c, gr.full[[i]])
    }
}
combined.peaks <- reduce(x = c(gr.full.c))

# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths  < 10000 & peakwidths > 20]
head(combined.peaks)

GRanges object with 6 ranges and 0 metadata columns:
      seqnames        ranges strand
         <Rle>     <IRanges>  <Rle>
  [1]     chr1    9866-10638      *
  [2]     chr1   16106-16366      *
  [3]     chr1   96454-96767      *
  [4]     chr1 180685-181403      *
  [5]     chr1 184126-184527      *
  [6]     chr1 191059-192097      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

In [10]:
length(combined.peaks)

## Are any peaks in the chrM

In [11]:
length(combined.peaks[seqnames(combined.peaks) != "chrM"])


In [12]:
#combined.peaks[seqnames(combined.peaks) == "chrM"]

combined.peaks[seqnames(combined.peaks) == "chr1"]


GRanges object with 21709 ranges and 0 metadata columns:
          seqnames              ranges strand
             <Rle>           <IRanges>  <Rle>
      [1]     chr1          9866-10638      *
      [2]     chr1         16106-16366      *
      [3]     chr1         96454-96767      *
      [4]     chr1       180685-181403      *
      [5]     chr1       184126-184527      *
      ...      ...                 ...    ...
  [21705]     chr1 248924437-248928616      *
  [21706]     chr1 248930023-248930301      *
  [21707]     chr1 248942373-248942525      *
  [21708]     chr1 248944205-248944462      *
  [21709]     chr1 248944909-248946333      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths

## Remove any peaks in the chrM

In [13]:
combined.peaks = combined.peaks[seqnames(combined.peaks) != "chrM"]
length(combined.peaks)

## Save merged peaks as bed

In [14]:
gr <- unlist(as(gr.full, "GRangesList"))

In [15]:
df <- data.frame(seqnames=seqnames(gr),
  starts=start(gr)-1,
  ends=end(gr),
  names=tibble::rowid_to_column(as.data.frame(gr), "names")$names,
  scores=c(rep(".", length(gr))),
  strands=strand(gr))

df = dplyr::mutate_if(df, is.numeric, as.integer)
df

seqnames,starts,ends,names,scores,strands
<fct>,<int>,<int>,<int>,<chr>,<fct>
chr1,9866,10609,1,.,*
chr1,180846,180976,2,.,*
chr1,184393,184458,3,.,*
chr1,191211,191970,4,.,*
chr1,267786,268257,5,.,*
chr1,271048,271558,6,.,*
chr1,586018,586368,7,.,*
chr1,605251,605792,8,.,*
chr1,778134,779417,9,.,*
chr1,816855,817645,10,.,*


In [16]:
write.table(df, file=file.path(outdir,"merged_peaks.bed"), quote=F, sep="\t", row.names=F, col.names=F)

In [17]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.4 LTS

Matrix products: default
BLAS/LAPACK: /home/isaac/miniconda3/envs/mttrace/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] future_1.23.0        dplyr_1.0.8          metap_1.1           
 [4] cowplot_1.1.1        magrittr_2.0.2       data.table_1.14.2   
 [7] patchwork_1.1.1      ggplot2_3.3.5        Signac_1.4.0        
[10] SeuratObject_4.0.4   Seurat_4.1.0         GenomicRanges_1.46.0
[13] GenomeInfoDb_

## Create expSignac

In [26]:
expr.load <- function(samples_df){ #sample_names, samples) {
#     sample_names <- 
#     samples <- curr.csv[]
#     print('sample_names')
#     print(sample_names)
#     print('samples')
#     print(samples)    
    allSE = c() 

    #samples_df <- cbind(sample_names, samples)
    print('samples_df')
    print(samples_df)
    for (row in 1:nrow(samples_df)){
        exp <- samples_df[row,][["cellr_ID"]]
        name <- samples_df[row,][["sample_name"]]
        
        curr_in <- dirname(dirname(samples_df[[row, "barcode_f"]]))
        print('exp')
        print(exp)
        print('name')
        print(name)
        barcode_path <- file.path(curr_in, "filtered_peak_bc_matrix", "barcodes.tsv")    
        print("barcode_path")
        print(barcode_path)
        barcodes <- readr::read_tsv(barcode_path, col_names = F) # %>% tidyr::unite(barcode)
        barcodes <- as.data.frame(barcodes) %>%  tibble::column_to_rownames(var="X1") %>% tibble::add_column(proj=name)
        frag_file <- file.path(curr_in, "fragments.tsv.gz")

        cells.meta.f <- file.path(curr_in, "singlecell.csv") 
        cells.meta <- as.data.frame(readr::read_csv(cells.meta.f)) %>% tibble::column_to_rownames(var="barcode") %>% tibble::add_column(proj=name)
        cells.meta <- cells.meta[rownames(cells.meta) %in% rownames(barcodes), ]

        # quantify multiome peaks in the scATAC-seq dataset


        print("Creating fragments object")
        frags.curr <- CreateFragmentObject(path = frag_file, cells= rownames(barcodes))
        #print(frags.curr)
        print("Quantifying peaks")
        ## Quantify peaks
        curr.counts <- FeatureMatrix(
          fragments = frags.curr,
          features = combined.peaks,
          cells = rownames(barcodes),
          process_n = cores
        )

        print("Creating chromatin assay")
        ## Create the objects and use simple filters
        curr_assay <- CreateChromatinAssay(curr.counts, fragments = frags.curr, min.cells = 10, min.features = 200)
        curr <- CreateSeuratObject(curr_assay, assay = "ATAC", project=name, meta.data=cells.meta)
        #curr <- subset(curr, nCount_peaks > 2000 & nCount_peaks < 30000)
    
        print('curr_assay')
        print(head(curr_assay))
        print('curr')
        print(head(curr[[]]))
        allSE = c(allSE, curr)
        #return(curr)
    }

    return(allSE)
}
    #allSE <- sapply(samples, create_frag, curr_in=curr_in)

In [27]:
load.se.experiments <- function(sample_csv){
    curr.csv <- readr::read_csv(sample_csv)
    #print(curr.csv)
    return(expr.load(curr.csv)) 
    #return(expr.load(curr.csv$samples, curr.csv$cellr.id)) #sapply(curr.csv, expr.load)
}



In [None]:
allSE <- sapply(samps, load.se.experiments)

[1mRows: [22m[34m3[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "samples_df"
[90m# A tibble: 3 × 5[39m
  sample_name cellr_ID fastq_ID             barcode_f                      bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m                [3m[90m<chr>[39m[23m                          [3m[90m<chr>[39m[23m
[90m1[39m Control     Control  BC_10xATAC_rxn1b_neg /home/isaac/lewis/mito_lineag… /hom…
[90m2[39m Flt3l       Flt3l    BC_10xATAC_rxn1b_pos /home/isaac/lewis/mito_lineag… /hom…
[90m3[39m Input       Input    BC_10xATAC_rxn1      /home/isaac/lewis/mito_lineag… /hom…
[1] "exp"
[1] "Control"
[1] "name"
[1] "Control"
[1] "barcode_path"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b1/MTBlacklist_A2/Control/outs/filtered_peak_bc_matrix/barcodes.tsv"


[1mRows: [22m[34m5330[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m329709[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGAATCAAC-1    Control        2479          2410 14503      2850
AAACGAAAGAATCAGT-1    Control        2060          2012  6432      1259
AAACGAAAGCATACCT-1    Control        2788          2718 11858      1937
AAACGAAAGCGAGAAA-1    Control        2020          1968  7365      1097
AAACGAAAGTACAGAT-1    Control         884           875  4095       861
AAACGAAAGTACAGTA-1    Control        1839          1804  8088      1091
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGAATCAAC-1       72       31    1064          2355           8131
AAACGAAAGAATCAGT-1       31       19     266          1066           3791
AAACGAAAGCATACCT-1       51       34     549          3102           6185
AAACGAAAGCGAGAAA-1       25       27     275          2184           3757
AAACGAAAGTACAGAT-1       16       16     230           771  

[1mRows: [22m[34m7651[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m314920[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGGAGACTC-1      Flt3l         548           540  2286       298
AAACGAAAGTCACGCC-1      Flt3l        1282          1255  4711       555
AAACGAACAAGACTAA-1      Flt3l        2364          2280  9178      1509
AAACGAACAAGCGAAC-1      Flt3l        2393          2324  8825      1544
AAACGAACACAGCCAC-1      Flt3l        3036          2911 10589      1294
AAACGAACACAGGTAG-1      Flt3l         638           630  2384       240
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGGAGACTC-1        7        7     124           749           1101
AAACGAAAGTCACGCC-1       16       12     289           853           2986
AAACGAACAAGACTAA-1       37       19     585          1339           5689
AAACGAACAAGCGAAC-1       24       24     535          1308           5390
AAACGAACACAGCCAC-1       41       31     456          2764  

[1mRows: [22m[34m6848[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m490990[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGACGCCAA-1      Input       15995         13385 61053     21242
AAACGAAAGATATGAC-1      Input        3168          2873 29954      4853
AAACGAAAGTAATGTG-1      Input        7384          6547 32111      9018
AAACGAAAGTAGTCGG-1      Input        8901          7645 37923     12542
AAACGAACACATTGCA-1      Input        8783          7042 46206     12553
AAACGAACACGCTCAG-1      Input        6869          5900 38973      7584
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGACGCCAA-1      263      248    2186         11814          25300
AAACGAAAGATATGAC-1       67      103     626         19521           4784
AAACGAAAGTAATGTG-1       96      122    1319         11232          10324
AAACGAAAGTAGTCGG-1      147      162    1705         10189          13178
AAACGAACACATTGCA-1      148      184    1239         16880  

[1mRows: [22m[34m1[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "samples_df"
[90m# A tibble: 1 × 5[39m
  sample_name cellr_ID fastq_ID        barcode_f                           bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m           [3m[90m<chr>[39m[23m                               [3m[90m<chr>[39m[23m
[90m1[39m Input       Input    BC_10xATAC_rxn2 /home/isaac/lewis/mito_lineage/out… /hom…
[1] "exp"
[1] "Input"
[1] "name"
[1] "Input"
[1] "barcode_path"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_dec172021_b2/MTBlacklist_A2/Input/outs/filtered_peak_bc_matrix/barcodes.tsv"


[1mRows: [22m[34m4769[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m405884[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGCCGCAAA-1      Input        1276          1234 16081      1250
AAACGAACAGATGGCA-1      Input        8484          7625 44699     10292
AAACGAAGTATGTTCG-1      Input        4891          4495 21594      5059
AAACGAAGTCATTGGT-1      Input         619           608  9191       460
AAACGAATCAGAGTGG-1      Input        1955          1880 10961      1743
AAACGAATCCGGACTG-1      Input        2574          2451 15146      1793
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGCCGCAAA-1       25       60     379         11724           2643
AAACGAACAGATGGCA-1      190      169    1753         15193          17102
AAACGAAGTATGTTCG-1      133       91     784          8125           7402
AAACGAAGTCATTGGT-1       12       19     137          7471           1092
AAACGAATCAGAGTGG-1       56       43     289          4957  

[1mRows: [22m[34m2[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "samples_df"
[90m# A tibble: 2 × 5[39m
  sample_name cellr_ID fastq_ID      barcode_f                             bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m         [3m[90m<chr>[39m[23m                                 [3m[90m<chr>[39m[23m
[90m1[39m Input       Flt3l    BC_10xATAC_Fp /home/isaac/lewis/mito_lineage/outpu… /hom…
[90m2[39m Flt3l       Control  BC_10xATAC_Fn /home/isaac/lewis/mito_lineage/outpu… /hom…
[1] "exp"
[1] "Flt3l"
[1] "name"
[1] "Input"
[1] "barcode_path"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/CHIP_april08_2021_Croker/MTblacklist/Flt3l/outs/filtered_peak_bc_matrix/barcodes.tsv"


[1mRows: [22m[34m5313[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m532237[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGGAAACTT-1      Input        3223          3041 49701      5002
AAACGAAGTCCAAGAG-1      Input        5289          4876 62869      9096
AAACGAAGTCCGAGCT-1      Input        8635          7647 60574     11583
AAACGAAGTTCAGAAA-1      Input        3200          3042 32202      4591
AAACGAAGTTTGAAGA-1      Input        8229          7425 68829     12010
AAACGAATCAGCCGGT-1      Input        1273          1245 13222      1216
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGGAAACTT-1      164       92    1138         37812           5493
AAACGAAGTCCAAGAG-1      249      139    1902         42086           9397
AAACGAAGTCCGAGCT-1      352      141    2087         31767          14644
AAACGAAGTTCAGAAA-1      146       90     911         19849           6615
AAACGAAGTTTGAAGA-1      432      163    2209         39985  

[1mRows: [22m[34m843[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m470007[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC  total duplicate
AAACGAATCTTACTCA-1      Flt3l         220           220    779        47
AAACTCGAGTGGTGTG-1      Flt3l         401           398  16911      5224
AAACTCGTCAATGCAC-1      Flt3l        1042          1025  11963      2864
AAACTGCTCTCGCGGA-1      Flt3l       22162         15293 265035     83694
AAAGATGAGCGCATTT-1      Flt3l         381           377  15367      5530
AAAGATGCAGCAACGA-1      Flt3l       15437         10650 250347     85267
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAATCTTACTCA-1        6        7      37           299            383
AAACTCGAGTGGTGTG-1       95      356     690          9717            829
AAACTCGTCAATGCAC-1       63       78     583          6889           1486
AAACTGCTCTCGCGGA-1     1575     1101    9756        141471          27438
AAAGATGAGCGCATTT-1       83      294     733         

[1mRows: [22m[34m2[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (5): sample_name, cellr_ID, fastq_ID, barcode_f, bam_f

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "samples_df"
[90m# A tibble: 2 × 5[39m
  sample_name cellr_ID fastq_ID      barcode_f                             bam_f
  [3m[90m<chr>[39m[23m       [3m[90m<chr>[39m[23m    [3m[90m<chr>[39m[23m         [3m[90m<chr>[39m[23m                                 [3m[90m<chr>[39m[23m
[90m1[39m Control     P2       BC_10xATAC_P2 /home/isaac/lewis/mito_lineage/outpu… /hom…
[90m2[39m Flt3l       J2       BC_10xATAC_J2 /home/isaac/lewis/mito_lineage/outpu… /hom…
[1] "exp"
[1] "P2"
[1] "name"
[1] "Control"
[1] "barcode_path"
[1] "/home/isaac/lewis/mito_lineage/output/mtscATAC/data/jan21_2021/MTblacklist/P2/outs/filtered_peak_bc_matrix/barcodes.tsv"


[1mRows: [22m[34m6875[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m332219[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



[1] "Creating chromatin assay"


“Some cells in meta.data not present in provided counts matrix.”
“Keys should be one or more alphanumeric characters followed by an underscore, setting key from atac to atac_”


[1] "curr_assay"
data frame with 0 columns and 10 rows
[1] "curr"
                   orig.ident nCount_ATAC nFeature_ATAC total duplicate
AAACGAAAGAGGTCCA-1    Control        1558          1507 16929      2587
AAACGAAAGCGATACG-1    Control        4217          4003 45359     10546
AAACGAAAGTCGTGAG-1    Control        1239          1199 10177      2450
AAACGAACAATAGTGA-1    Control        3108          2950 22452      4364
AAACGAACACAATAAG-1    Control        1427          1398 11763      2003
AAACGAACACTGATAC-1    Control        1125          1110  9449      1971
                   chimeric unmapped lowmapq mitochondrial passed_filters
AAACGAAAGAGGTCCA-1       58       70     674          9629           3911
AAACGAAAGCGATACG-1      184      184    2302         22247           9896
AAACGAAAGTCGTGAG-1       48       60     657          2135           4827
AAACGAACAATAGTGA-1      100       83     897          9233           7775
AAACGAACACAATAAG-1       43       57     680          5547  

[1mRows: [22m[34m12009[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (1): X1

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m430951[39m [1mColumns: [22m[34m18[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): barcode, cell_id
[32mdbl[39m (16): total, duplicate, chimeric, unmapped, lowmapq, mitochondrial, pass...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


[1] "Creating fragments object"


Computing hash



[1] "Quantifying peaks"


Extracting reads overlapping genomic regions



In [40]:
allSE.full <- unlist(allSE, recursive=FALSE, use.names=TRUE)



In [41]:
allSE.full

$CHIP_b11
An object of class Seurat 
195596 features across 5308 samples within 1 assay 
Active assay: ATAC (195596 features, 0 variable features)

$CHIP_b12
An object of class Seurat 
187005 features across 7540 samples within 1 assay 
Active assay: ATAC (187005 features, 0 variable features)

$CHIP_b13
An object of class Seurat 
211492 features across 6848 samples within 1 assay 
Active assay: ATAC (211492 features, 0 variable features)

$CHIP_b2
An object of class Seurat 
158263 features across 4767 samples within 1 assay 
Active assay: ATAC (158263 features, 0 variable features)

$CHIP_a11
An object of class Seurat 
193788 features across 5312 samples within 1 assay 
Active assay: ATAC (193788 features, 0 variable features)

$CHIP_a12
An object of class Seurat 
101986 features across 745 samples within 1 assay 
Active assay: ATAC (101986 features, 0 variable features)

$cd34norm1
An object of class Seurat 
204193 features across 6624 samples within 1 assay 
Active assay: ATAC (2041

In [None]:
allSE

In [42]:
saveRDS(allSE.full, file.path(outdir, paste0("allSamples.rds")))

In [35]:
file.path(outdir, paste0("allSamples.rds"))

In [None]:
# library(Rsamtools)

# qc <- function(se){
#     # add the gene information to the object
#     Annotation(se) <- annotations
    
#     # compute nucleosome signal score per cell
#     se <- NucleosomeSignal(object = se)
    
#     # compute TSS enrichment score per cell
#     se <- TSSEnrichment(object = se, fast = FALSE)

#     # add blacklist ratio and fraction of reads in peaks
#     se$pct_reads_in_peaks <- se$peak_region_fragments / se$passed_filters * 100
#     se$blacklist_ratio <- se$blacklist_region_fragments / se$peak_region_fragments
#     se$high.tss <- ifelse(se$TSS.enrichment > 2, 'High', 'Low')
#     se$nucleosome_group <- ifelse(se$nucleosome_signal > 4, 'NS > 4', 'NS < 4')

#     return(se)
# }
# vPlot <- function(se){
#       vPlot <- VlnPlot(
#       object = se,
#       features = c('pct_reads_in_peaks', 'peak_region_fragments',
#                    'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
#       pt.size = 0.1,
#       ncol = 5
#     )  
#     vPlot <- vPlot +    # Create grid of plots with title
#              plot_annotation(title = se$orig.ident[[1]]) & 
#              theme(plot.title = element_text(hjust = 0.5, size=15))
#     #print(vPlot)
#     return(vPlot)
# }



In [36]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.4 LTS

Matrix products: default
BLAS/LAPACK: /home/isaac/miniconda3/envs/mttrace/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] future_1.23.0        dplyr_1.0.8          metap_1.1           
 [4] cowplot_1.1.1        magrittr_2.0.2       data.table_1.14.2   
 [7] patchwork_1.1.1      ggplot2_3.3.5        Signac_1.4.0        
[10] SeuratObject_4.0.4   Seurat_4.1.0         GenomicRanges_1.46.0
[13] GenomeInfoDb_

In [32]:
allSE

$CHIP_b1
$CHIP_b1[[1]]
An object of class Seurat 
195596 features across 5308 samples within 1 assay 
Active assay: ATAC (195596 features, 0 variable features)

$CHIP_b1[[2]]
An object of class Seurat 
187005 features across 7540 samples within 1 assay 
Active assay: ATAC (187005 features, 0 variable features)

$CHIP_b1[[3]]
An object of class Seurat 
211492 features across 6848 samples within 1 assay 
Active assay: ATAC (211492 features, 0 variable features)


$CHIP_b2
$CHIP_b2[[1]]
An object of class Seurat 
158263 features across 4767 samples within 1 assay 
Active assay: ATAC (158263 features, 0 variable features)


$CHIP_a1
$CHIP_a1[[1]]
An object of class Seurat 
193788 features across 5312 samples within 1 assay 
Active assay: ATAC (193788 features, 0 variable features)

$CHIP_a1[[2]]
An object of class Seurat 
101986 features across 745 samples within 1 assay 
Active assay: ATAC (101986 features, 0 variable features)


$cd34norm
$cd34norm[[1]]
An object of class Seurat 
204193 