In [1]:
import scanpy as sc
import anndata as ad

In [2]:
def anndata_load(file_path):
    """Load anndata, with file_path containing mtx file"""
    adata = sc.read_10x_mtx(file_path, var_names='gene_symbols')
    adata.var_names_make_unique()
    return adata

def anndata_preprocess(adata,
                        min_genes = None,
                        min_cells =None,
                        n_top_genes = 10000):
    """Preprocess function"""
    if min_genes is not None:
        sc.pp.filter_cells(adata, min_genes=min_genes)
    if min_cells is not None:
        sc.pp.filter_genes(adata, min_cells=min_cells)
    sc.pp.normalize_total(adata,target_sum=1e4)
    sc.pp.log1p(adata)
    #sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes)
    #adata.raw = adata
    #data = adata[:,adata.var.highly_variable]
    return adata

In [3]:
filepath = file_path = "../../data/filtered_gene_bc_matrices/hg19/"
save_path = "../../data/filtered_hg19.h5ad"
adata = anndata_load(filepath)
adata.shape

(2700, 32738)

In [4]:
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata = adata[adata.obs.n_genes_by_counts < 2500, :]
adata = adata[adata.obs.pct_counts_mt < 5, :]
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=3000,flavor='seurat_v3')

  view_to_actual(adata)


In [5]:
adata.raw = adata
adata = adata[:, adata.var.highly_variable]

In [6]:
adata.shape

(2638, 3000)

In [62]:
adata.write(save_path)

In [7]:
tr_adata = sc.read("../../pbmc_tutorial/write/pbmc3k.h5ad")

In [8]:
tr_adata.obs['leiden']

AAACATACAACCAC-1    3
AAACATTGAGCTAC-1    2
AAACATTGATCAGC-1    0
AAACCGTGCTTCCG-1    4
AAACCGTGTATGCG-1    5
                   ..
TTTCGAACTCTCAT-1    1
TTTCTACTGAGGCA-1    2
TTTCTACTTCCTCG-1    2
TTTGCATGAGAGGC-1    2
TTTGCATGCCTCAC-1    0
Name: leiden, Length: 2638, dtype: category
Categories (8, object): ['0', '1', '2', '3', '4', '5', '6', '7']

In [9]:
adata.obs['leiden'] = tr_adata.obs['leiden']

  adata.obs['leiden'] = tr_adata.obs['leiden']


In [None]:
adata.obs['leiden']