In [62]:
library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
library(Matrix)


载入程辑包：'Matrix'


The following object is masked from 'package:S4Vectors':

    expand




In [2]:
root_dir = '.'
pbmc_scrna_data_dir = file.path(root_dir, 'pbmc8k_filtered_gene_bc_matrices', 'filtered_gene_bc_matrices', 'GRCh38')
pbmc_atac_data_dir = file.path(root_dir, 'atac_v1_pbmc_10k_filtered_peak_bc_matrix','filtered_peak_bc_matrix')
atac_fragment_path = file.path(root_dir, "atac_v1_pbmc_10k_fragments.tsv.gz")

In [3]:
pbmc_atac_data <- ReadMtx(mtx=file.path(pbmc_atac_data_dir,'matrix.mtx'), 
                          cells=file.path(pbmc_atac_data_dir, 'barcodes.tsv'),
                          features=file.path(pbmc_atac_data_dir, 'peaks.tsv'),
                          feature.column=1)
pbmc_atac_metadata <- read.table("pbmc_atac_metadata.tsv", sep=',')

In [4]:
pbmc_atac_obj <- CreateSeuratObject(counts = pbmc_atac_data, project = "pbmc10k", assay="ATAC", meta.data=pbmc_atac_metadata)

In [5]:
pbmc_atac_metadata

Unnamed: 0_level_0,orig.ident,nCount_ATAC,nFeature_ATAC,predicted.celltype.l1.score,predicted.celltype.l1,predicted.celltype.l2.score,predicted.celltype.l2,predicted.celltype.l3.score,predicted.celltype.l3,nCount_RNA,nFeature_RNA
Unnamed: 0_level_1,<chr>,<int>,<int>,<dbl>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<int>,<int>
AAACGAAAGAGCGAAA-1,SeuratProject,1172,1036,0.3726037,Mono,0.3484789,CD14 Mono,0.3484789,CD14 Mono,5972,3167
AAACGAAAGAGTTTGA-1,SeuratProject,1358,1179,0.6990826,Mono,0.6938140,CD14 Mono,0.6938140,CD14 Mono,6302,3371
AAACGAAAGCGAGCTA-1,SeuratProject,2395,1816,0.3192837,Mono,0.2271536,CD14 Mono,0.2271536,CD14 Mono,12570,5281
AAACGAAAGGCTTCGC-1,SeuratProject,18295,8919,0.4638083,Mono,0.4638083,CD14 Mono,0.4638083,CD14 Mono,97107,11008
AAACGAAAGTGCTGAG-1,SeuratProject,869,781,0.8818523,Mono,0.8603005,CD14 Mono,0.8603005,CD14 Mono,4682,2822
AAACGAAGTCAGGCTC-1,SeuratProject,523,456,0.5460860,DC,0.5422624,pDC,0.5422624,pDC,2839,1843
AAACGAAGTGCCCGAT-1,SeuratProject,1759,1517,0.6674465,Mono,0.6666571,CD14 Mono,0.6666571,CD14 Mono,8824,4391
AAACGAAGTTGTATCG-1,SeuratProject,1248,1106,0.6331085,B,0.5799114,B naive,0.4834936,B naive lambda,6546,3537
AAACGAATCAGTTGAC-1,SeuratProject,802,708,0.4664765,Mono,0.3224327,NK,0.3050318,CD14 Mono,4271,2567
AAACGAATCCTTACGC-1,SeuratProject,1042,888,0.7679072,Mono,0.7622426,CD14 Mono,0.7622426,CD14 Mono,5602,2587


In [6]:
print(unique(pbmc_atac_obj@meta.data$predicted.celltype.l1))

[1] "Mono"    "DC"      "B"       "CD4 T"   "CD8 T"   "other T" "NK"     
[8] "other"  


In [7]:
print(unique(pbmc_atac_obj@meta.data$predicted.celltype.l2))

 [1] "CD14 Mono"      "pDC"            "B naive"        "NK"            
 [5] "B intermediate" "CD4 Naive"      "CD8 Naive"      "MAIT"          
 [9] "CD4 TCM"        "CD16 Mono"      "Doublet"        "CD8 TEM"       
[13] "cDC2"           "B memory"       "HSPC"           "NK_CD56bright" 
[17] "dnT"            "Platelet"       "Treg"           "Plasmablast"   
[21] "Eryth"          "ASDC"           "CD8 TCM"        "gdT"           
[25] "cDC1"           "CD4 TEM"       


In [8]:
print(unique(pbmc_atac_obj@meta.data$predicted.celltype.l3))

 [1] "CD14 Mono"             "pDC"                   "B naive lambda"       
 [4] "B intermediate kappa"  "CD4 Naive"             "Doublet"              
 [7] "MAIT"                  "CD4 TCM_1"             "NK_4"                 
[10] "CD16 Mono"             "CD4 TCM_2"             "CD8 Naive_2"          
[13] "B naive kappa"         "NK_2"                  "CD8 TEM_2"            
[16] "cDC2_1"                "B memory kappa"        "CD8 TEM_1"            
[19] "HSPC"                  "CD8 Naive"             "NK_CD56bright"        
[22] "dnT_2"                 "Platelet"              "Treg Memory"          
[25] "B intermediate lambda" "NK_1"                  "CD8 TEM_5"            
[28] "CD4 TCM_3"             "CD8 TEM_6"             "B memory lambda"      
[31] "Plasma"                "Eryth"                 "ASDC_pDC"             
[34] "CD8 TEM_4"             "cDC1"                  "CD4 TEM_1"            
[37] "CD4 CTL"               "gdT_1"                


In [9]:
cell_types = unique(pbmc_atac_obj@meta.data$predicted.celltype.l2)
summary(as.factor(pbmc_atac_obj@meta.data$predicted.celltype.l2))

* CD14 Mono: 3906
* CD4 TCM: 897
* CD8 Naive: 464
* B naive: 382
* CD8 TEM: 256
* NK: 320

## 筛选peak

In [73]:
save_dir = file.path("pbmc_sep_data")

### CD14 Mono

In [66]:
tgt_type = "CD14 Mono"
sub_data = subset(pbmc_atac_obj, predicted.celltype.l2==tgt_type)
percentile = FindTopFeatures(sub_data@assays$ATAC@counts)

In [41]:
threshold = 0.85
sum(percentile$percentile>threshold)

In [70]:
cd14_sub_data = subset(sub_data, feature=rownames(subset(percentile, percentile>threshold)))
sp_data = Matrix(cd14_sub_data@assays$ATAC@counts, sparse = TRUE)
cd14_sub_data

An object of class Seurat 
12044 features across 3906 samples within 1 assay 
Active assay: ATAC (12044 features, 0 variable features)
 2 layers present: counts, data

In [76]:
tgt_dir = file.path(save_dir, tgt_type, 'atac')
if (!file.exists(tgt_dir)){
    dir.create(tgt_dir, recursive = TRUE)
}
write(x=sp_data@Dimnames[[1]], file=file.path(tgt_dir, "peaks.tsv"))
write(x=sp_data@Dimnames[[2]], file=file.path(tgt_dir, "barcodes.tsv"))
writeMM(obj=sp_data, file=file.path(tgt_dir, "matrix.mtx"))

NULL

### CD4 TCM

In [77]:
tgt_type = "CD4 TCM"
sub_data = subset(pbmc_atac_obj, predicted.celltype.l2==tgt_type)
percentile = FindTopFeatures(sub_data@assays$ATAC@counts)

In [78]:
threshold = 0.85
sum(percentile$percentile>threshold)

In [79]:
cd4_sub_data = subset(sub_data, feature=rownames(subset(percentile, percentile>threshold)))
sp_data = Matrix(cd4_sub_data@assays$ATAC@counts, sparse = TRUE)
cd4_sub_data

An object of class Seurat 
12040 features across 897 samples within 1 assay 
Active assay: ATAC (12040 features, 0 variable features)
 2 layers present: counts, data

In [80]:
tgt_dir = file.path(save_dir, tgt_type, 'atac')
if (!file.exists(tgt_dir)){
    dir.create(tgt_dir, recursive = TRUE)
}
write(x=sp_data@Dimnames[[1]], file=file.path(tgt_dir, "peaks.tsv"))
write(x=sp_data@Dimnames[[2]], file=file.path(tgt_dir, "barcodes.tsv"))
writeMM(obj=sp_data, file=file.path(tgt_dir, "matrix.mtx"))

NULL

### CD8 Naive

In [81]:
tgt_type = "CD8 Naive"
sub_data = subset(pbmc_atac_obj, predicted.celltype.l2==tgt_type)
percentile = FindTopFeatures(sub_data@assays$ATAC@counts)

In [82]:
threshold = 0.85
sum(percentile$percentile>threshold)

In [83]:
cd8n_sub_data = subset(sub_data, feature=rownames(subset(percentile, percentile>threshold)))
sp_data = Matrix(cd8n_sub_data@assays$ATAC@counts, sparse = TRUE)
cd8n_sub_data

An object of class Seurat 
12036 features across 464 samples within 1 assay 
Active assay: ATAC (12036 features, 0 variable features)
 2 layers present: counts, data

In [84]:
tgt_dir = file.path(save_dir, tgt_type, 'atac')
if (!file.exists(tgt_dir)){
    dir.create(tgt_dir, recursive = TRUE)
}
write(x=sp_data@Dimnames[[1]], file=file.path(tgt_dir, "peaks.tsv"))
write(x=sp_data@Dimnames[[2]], file=file.path(tgt_dir, "barcodes.tsv"))
writeMM(obj=sp_data, file=file.path(tgt_dir, "matrix.mtx"))

NULL

### B naive

In [87]:
tgt_type = "B naive"
sub_data = subset(pbmc_atac_obj, predicted.celltype.l2==tgt_type)
percentile = FindTopFeatures(sub_data@assays$ATAC@counts)

In [88]:
threshold = 0.85
sum(percentile$percentile>threshold)

In [89]:
bn_sub_data = subset(sub_data, feature=rownames(subset(percentile, percentile>threshold)))
sp_data = Matrix(bn_sub_data@assays$ATAC@counts, sparse = TRUE)
bn_sub_data

An object of class Seurat 
12079 features across 382 samples within 1 assay 
Active assay: ATAC (12079 features, 0 variable features)
 2 layers present: counts, data

In [90]:
tgt_dir = file.path(save_dir, tgt_type, 'atac')
if (!file.exists(tgt_dir)){
    dir.create(tgt_dir, recursive = TRUE)
}
write(x=sp_data@Dimnames[[1]], file=file.path(tgt_dir, "peaks.tsv"))
write(x=sp_data@Dimnames[[2]], file=file.path(tgt_dir, "barcodes.tsv"))
writeMM(obj=sp_data, file=file.path(tgt_dir, "matrix.mtx"))

NULL