In [None]:
suppressWarnings(suppressMessages({
    library(Signac)
    library(Seurat)
    library(EnsDb.Mmusculus.v79)
    library(tidyverse)
    library(SingleR)
    library(Signac)
    library(Seurat)
    library(GenomicRanges)
    library(future)
}))
plan("multicore", workers = 4)
options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
set.seed(1)
color_protocol <- c("#0067AA","#FF7F00","#00A23F","#FF1F1D","#A763AC","#B45B5D","#FF8AB6","#B6B800","#01C1CC","#85D5F8","#FFC981","#C8571B","#727272","#EFC800","#8A5626","#502E91","#59A4CE","#344B2B","#FBE29D","#FDD6E6","#849C8C","#F07C6F","#000101")

In [None]:
# read in peak sets
peaks.sgr <- read.table(
  file = "/SGRNJ06/randd/USER/cjj/celedev/atac/MAESTRO/test/20231109sgr_mouse_cutoff/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs/A0920_3_ME_YSPB_WX_D_T7_EDTA5030_final_peaks.bed",
  col.names = c("chr", "start", "end")
)
peaks.10x <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs/filtered_peak_bc_matrix/peaks.bed",
  col.names = c("chr", "start", "end")
)

In [None]:
# convert to genomic ranges
gr.sgr <- makeGRangesFromDataFrame(peaks.sgr)
gr.10x <- makeGRangesFromDataFrame(peaks.10x)

In [None]:
# Create a unified set of peaks to quantify in each dataset
combined.peaks <- Signac::reduce(x = c(gr.sgr, gr.10x))

In [None]:
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths  < 10000 & peakwidths > 20]
combined.peaks

In [None]:
# load metadata
md.sgr <- read.table(
  file = "/SGRNJ06/randd/USER/cjj/celedev/atac/MAESTRO/test/20231109sgr_mouse_cutoff/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs/cell_qc_metrics.tsv",
  stringsAsFactors = FALSE,
  sep = "\t",
  header = TRUE,
  row.names = 1
)[-1, ] # remove the first row

md.10x <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs//singlecell.csv",
  stringsAsFactors = FALSE,
  sep = ",",
  header = TRUE,
  row.names = 1
)[-1, ]

In [None]:
#md.sgr

In [None]:
md.sgr <- md.sgr[md.sgr$cell_called == "True", ]
md.10x <- md.10x[md.10x$is__cell_barcode == 1, ]

In [None]:
# create fragment objects
frags.sgr <- CreateFragmentObject(
  path = "/SGRNJ06/randd/USER/cjj/celedev/atac/MAESTRO/test/20231109sgr_mouse_cutoff/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/02.atac/Result/Mapping/A0920_3_ME_YSPB_WX_D_T7_EDTA5030//fragments_corrected_dedup_count.tsv.gz",
  cells = rownames(md.sgr)
)
frags.10x <- CreateFragmentObject(
  path = "/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs/fragments.tsv.gz",
  cells = rownames(md.10x)
)

In [None]:
counts.sgr <- FeatureMatrix(
  fragments = frags.sgr,
  features = combined.peaks,
  cells = rownames(md.sgr)
)

counts.10x <- FeatureMatrix(
  fragments = frags.10x,
  features = combined.peaks,
  cells = rownames(md.10x)
)

In [None]:
sgr_assay <- CreateChromatinAssay(counts.sgr, fragments = frags.sgr)
sgr <- CreateSeuratObject(sgr_assay, assay = "ATAC", meta.data=md.sgr)

tenx_assay <- CreateChromatinAssay(counts.10x, fragments = frags.10x)
tenx <- CreateSeuratObject(tenx_assay, assay = "ATAC", meta.data=md.10x)

In [None]:
# add information to identify dataset of origin
sgr$dataset <- 'sgr'
tenx$dataset <- 'tenx'

# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
  x = sgr,
  y = tenx,
  add.cell.ids = c("sgr", "tenx")
)
combined[["ATAC"]]

In [None]:
combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = 2:50, reduction = 'lsi')

In [None]:
DimPlot(combined, group.by = 'dataset', pt.size = 0.1)

In [None]:
pdf(file = '/SGRNJ06/randd/USER/cjj/celedev/rna/20231128nanda/SeuratTSNE1.pdf', height=10, width=10)
DimPlot(rds, reduction = "tsne",group.by="sample") + scale_color_npg()
dev.off()

In [None]:
# cr and cr
library(harmony)

In [None]:
# read in peak sets
peaks.sgr <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/20231026_M_959595_sc/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/03.atac/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/outs/filtered_peak_bc_matrix/peaks.bed",
  col.names = c("chr", "start", "end")
)
peaks.10x <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/summary_10X_data_analysis/mouse/20231013_mouse/L220728027XA_hepar_outdir/outs/filtered_peak_bc_matrix/peaks.bed",
  col.names = c("chr", "start", "end")
)

In [None]:
# convert to genomic ranges
gr.sgr <- makeGRangesFromDataFrame(peaks.sgr)
gr.10x <- makeGRangesFromDataFrame(peaks.10x)

In [None]:
# Create a unified set of peaks to quantify in each dataset
combined.peaks <- Signac::reduce(x = c(gr.sgr, gr.10x))

In [None]:
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths  < 10000 & peakwidths > 20]
combined.peaks

In [None]:
# load metadata
md.sgr <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/20231026_M_959595_sc/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/03.atac/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/outs/singlecell.csv",
  stringsAsFactors = FALSE,
  sep = ",",
  header = TRUE,
  row.names = 1
)[-1, ] # remove the first row

md.10x <- read.table(
  file = "/SGRNJ06/randd/PROJECT/scATAC/summary_10X_data_analysis/mouse/20231013_mouse/L220728027XA_hepar_outdir/outs/singlecell.csv",
  stringsAsFactors = FALSE,
  sep = ",",
  header = TRUE,
  row.names = 1
)[-1, ]

In [None]:
md.sgr <- md.sgr[md.sgr$is__cell_barcode == 1, ]
md.10x <- md.10x[md.10x$is__cell_barcode == 1, ]

In [None]:
# md.10x

In [None]:
# create fragment objects
frags.sgr <- CreateFragmentObject(
  path = "/SGRNJ06/randd/PROJECT/scATAC/20231026_M_959595_sc/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/03.atac/A1017_2_ML_YSPB_WX_D_T7_EDTA5030/outs/fragments.tsv.gz",
  cells = rownames(md.sgr)
)
frags.10x <- CreateFragmentObject(
  path = "/SGRNJ06/randd/PROJECT/scATAC/summary_10X_data_analysis/mouse/20231013_mouse/L220728027XA_hepar_outdir/outs/fragments.tsv.gz",
  cells = rownames(md.10x)
)

In [None]:
counts.sgr <- FeatureMatrix(
  fragments = frags.sgr,
  features = combined.peaks,
  cells = rownames(md.sgr)
)

counts.10x <- FeatureMatrix(
  fragments = frags.10x,
  features = combined.peaks,
  cells = rownames(md.10x)
)

In [None]:
sgr_assay <- CreateChromatinAssay(counts.sgr, fragments = frags.sgr)
sgr <- CreateSeuratObject(sgr_assay, assay = "ATAC", meta.data=md.sgr)

tenx_assay <- CreateChromatinAssay(counts.10x, fragments = frags.10x)
tenx <- CreateSeuratObject(tenx_assay, assay = "ATAC", meta.data=md.10x)

In [None]:
# add information to identify dataset of origin
sgr$dataset <- 'sgr'
tenx$dataset <- 'tenx'

# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
  x = sgr,
  y = tenx,
  add.cell.ids = c("sgr", "tenx")
)
combined[["ATAC"]]

In [None]:
combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = 2:50, reduction = 'lsi')

In [None]:
combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
PRO <-  NormalizeData(object = combined)
PRO <-  ScaleData(object = PRO)
PRO <- FindVariableFeatures(object = PRO)
genes.use<- head(HVFInfo(object = PRO),2000)
PRO <- RunPCA(object=PRO,features = VariableFeatures(object = PRO))
PRO <- RunHarmony(PRO,group.by="dataset" , plot_convergence = TRUE)
PRO <- FindNeighbors(PRO, reduction = "harmony", dims = 1:20)
PRO <- FindClusters(PRO,resolution = 0.8, algorithm = 1)
PRO <- RunUMAP(PRO, reduction = "harmony", dims = 1:20)
PRO <- RunTSNE(object=PRO,reduction = "harmony",dims.use=1:20,do.fast=TRUE,check_duplicates = FALSE)

In [None]:
DimPlot(PRO, group.by = 'dataset', reduction = "umap", pt.size = 0.1)

In [None]:
pdf(file = '/SGRNJ06/randd/USER/cjj/celedev/atac/20231204merge/A1017_2_ML_YSPB_WX_D_T7_EDTA5030.pdf', height=10, width=10)
#DimPlot(combined, group.by = 'dataset', pt.size = 0.1, reduction = "umap")
DimPlot(PRO, group.by = 'dataset', reduction = "umap", pt.size = 0.1)
dev.off()

In [None]:
CoveragePlot(
  object = combined,
  group.by = 'dataset',
  region = "chr14-99700000-99760000"
)

In [None]:
# load the count matrix for each object that was generated by cellranger
counts.sgr <- Read10X_h5("/SGRNJ06/randd/USER/cjj/celedev/atac/MAESTRO/test/20231109sgr_mouse_cutoff/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/02.atac/Result/QC/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/A0920_3_ME_YSPB_WX_D_T7_EDTA5030_filtered_peak_count.h5")
counts.tenx <- Read10X_h5("/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs/filtered_peak_bc_matrix.h5")

In [None]:
#counts.sgr
head(sgr@meta.data)

In [None]:
# create objects
sgr_assay <- CreateChromatinAssay(counts = counts.sgr, sep = c("_", "_"), min.features = 500)
sgr <- CreateSeuratObject(sgr_assay, assay = "peaks")
tenx_assay <- CreateChromatinAssay(counts = counts.tenx, sep = c(":", "-"), min.features = 500)
tenx <- CreateSeuratObject(tenx_assay, assay = "peaks")

In [None]:
# add information to identify dataset of origin
sgr$dataset <- 'sgr'
tenx$dataset <- 'tenx'

# merge
combined <- merge(
  x = sgr,
  y = tenx,
  add.cell.ids = c("sgr", "tenx")
)

# process 
combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = 2:50, reduction = 'lsi')
DimPlot(combined, group.by = 'dataset', pt.size = 0.1)