In [1]:
import marker_utils
import scanpy as sc
import marker_utils.save_data as save_data

Before inputting the scATAC-seq data, it must be preprocessed. We have provided a preprocessing script for this purpose.

python atac_processed.py --input atac_data_path --fpeak 0.05 --output ./

In [2]:
# Ensure that the obs in the data contains a celltype entry to indicate the cluster to which each cell belongs. 
# Additionally, the obsm should include a spatial entry to provide spatial information.
sc.read_h5ad('example_data/spatial_atac.h5ad')

AnnData object with n_obs × n_vars = 2129 × 37475
    obs: 'gex_barcode', 'atac_barcode', 'is_cell', 'excluded_reason', 'gex_raw_reads', 'gex_mapped_reads', 'gex_conf_intergenic_reads', 'gex_conf_exonic_reads', 'gex_conf_intronic_reads', 'gex_conf_exonic_unique_reads', 'gex_conf_exonic_antisense_reads', 'gex_conf_exonic_dup_reads', 'gex_exonic_umis', 'gex_conf_intronic_unique_reads', 'gex_conf_intronic_antisense_reads', 'gex_conf_intronic_dup_reads', 'gex_intronic_umis', 'gex_conf_txomic_unique_reads', 'gex_umis_count', 'gex_genes_count', 'atac_raw_reads', 'atac_unmapped_reads', 'atac_lowmapq', 'atac_dup_reads', 'atac_chimeric_reads', 'atac_mitochondrial_reads', 'atac_fragments', 'atac_TSS_fragments', 'atac_peak_region_fragments', 'atac_peak_region_cutsites', 'Sample', 'TSSEnrichment', 'ReadsInTSS', 'ReadsInPromoter', 'ReadsInBlacklist', 'PromoterRatio', 'PassQC', 'NucleosomeRatio', 'nMultiFrags', 'nMonoFrags', 'nFrags', 'nDiFrags', 'Gex_RiboRatio', 'Gex_nUMI', 'Gex_nGenes', 'Gex_MitoR

In [3]:
# Run with the default parameters.
info,adata=marker_utils.get_spatial_MarkersEI('example_data/spatial_atac.h5ad')

------Loading data------
------Data transposition------
2129 cells, 37475 genes.
------Step1: Calculate PCA------
------Step2: Calculate the similarity matrix------
------Step3: Adjust the expression value------
------Step4: Calculate EI------


cluster:Basal_plate_of_hindbrainrunningcalculate_EI(6/6): 100%|█| 14/14 [12:37<0

2.41 seconds used for calculate_mean_and_var.
686.12 seconds used for calculate_smoothness.
1.05 seconds used for calculate_V.
11.81 seconds used for calculate_prop.
54.59 seconds used for calculate_local_mean_max.
0.88 seconds used for calculate_EI.
------Congratulations, success!------





In [4]:
# Print the first six Features.
info[list(info.keys())[0]].sort_values(by='EI', ascending=False).head(6)

Unnamed: 0,Features,Mean,Smoothness,Local_max,V,Prop,Prop_sum,P,EI
20225,chr2:145881298-145881799,0.844071,0.050133,1.384429,14.21141,4866924000000.0,20281930.0,0.127119,0.023072
15403,chr17:56007029-56007530,0.696012,0.055302,1.303596,8.759784,5746806000.0,40621.94,0.076923,0.009059
25884,chr5:33228289-33228790,0.762636,0.053648,1.22136,10.841203,167240100000.0,2411353.0,0.105932,0.005955
20967,chr2:173377888-173378389,0.787216,0.053523,1.23659,11.578457,31001560000.0,1314712.0,0.105263,0.00213
20866,chr2:168451683-168452184,0.752692,0.056592,0.810277,10.011061,17676570000.0,1016632.0,0.105263,0.002073
7217,chr12:16151031-16151532,0.788245,0.05426,1.154678,11.450917,54371240000.0,3021226.0,0.107692,0.001743


In [5]:
# Save the data to the output folder.
save_data(info,output_dir="./output") 