In [None]:
import scanpy as sc
import sciduck as sd

## Load in raw data

In [None]:
adata = ad.read_h5ad("YOUR_RAW_DATA.h5ad")

## Standard scanpy preprocessing

In [None]:
## Normalize the count matrix after storing the raw counts in the raw slot
adata.raw = adata
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.log1p(adata)

## Calculate some basic QC metrics around UMI and gene counts
sc.pp.calculate_qc_metrics(adata, inplace=True)

## Compute clusters for use later
sc.tl.pca(adata, svd_solver='arpack', use_highly_variable=True)
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.leiden(adata, flavor="igraph", n_iterations=2)


## Basic quality control (`sciduck`)

In [None]:
## Filter cells/nuclei on UMI and gene count thresholds, showing the default values.
sd.basic_qc.filter_on_counts_genes(adata, 
                                    min_counts = 2000, max_counts = 100000, 
                                    min_genes = 1000, max_genes = 13000,
                                    inplace = True)
adata.obs.keeper_cells.value_counts()

Describe whats happening!

In [None]:
## Filter cells/nuclei on mitochondrial gene expression, showing the default values.
adata = sd.basic_qc.filter_on_precomputed_metrics(adata, 
                                                    doublet_score = 0.3, 
                                                    pct_counts_mt = 3.0,
                                                    GEX_Reads_mapped_confidently_to_genome = 0.0, 
                                                    GEX_Reads_mapped_to_genome = 0.0, 
                                                    GEX_Reads_with_TSO = 1.0, 
                                                    inplace = False)
adata.obs.keeper_cells.value_counts()

## Quality control involving coarse labels (Neuron / Non-neuron)

In [None]:
adata = sd.basic_qc.filter_utilizing_coarse_labels(adata, 
                                            coarse_label_column = "Class", 
                                            coarse_label_map = {'Neurons': ['Excitatory', 'Inhibitory'], 
                                                                'Non-Neurons': ['Astrocytes', 'Oligodendrocytes', 'Microglia', 'Endothelial', 'Pericytes']}, 
                                            coarse_label_gene_threshold = {'Neurons': 2000, 'Non-Neurons': 1000})
adata.obs.keeper_cells.value_counts()