In [1]:
import scanpy as sc
import anndata as ad

In [2]:

def anndata_load(file_path):
    """Load anndata, with file_path containing mtx file"""
    adata = sc.read_10x_mtx(file_path, var_names='gene_symbols')
    adata.var_names_make_unique()
    return adata

def anndata_preprocess(adata,
                        min_genes = 200,
                        min_cells = 3,
                        n_top_genes = 2000):
    """Preprocess function"""
    if min_genes is not None:
        sc.pp.filter_cells(adata, min_genes=min_genes)
    if min_cells is not None:
        sc.pp.filter_genes(adata, min_cells=min_cells)
    sc.pp.normalize_total(adata,target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes)
    adata.raw = adata
    adata = adata[:,adata.var.highly_variable]
    return adata

In [5]:
filepath = file_path = "../../data/filtered_gene_bc_matrices/hg19/"
save_path = "../../data/filtered_hg19.h5ad"
adata = anndata_load(filepath)
adata.shape

(2700, 32738)

In [6]:
adata = anndata_preprocess(adata, n_top_genes=2000)
adata.shape

(2700, 2000)

In [7]:
adata.write(save_path)