In [None]:
import tempfile
import os
import shutil
import json

import scanpy as sc
import transcriptomic_clustering as tc
from transcriptomic_clustering.iterative_clustering import build_cluster_dict, iter_clust, OnestepKwargs

In [None]:
# Setup input/output files
output_file = os.path.expanduser('clusters.json')

path_to_adata = '././notebooks/data/tasic2016counts_sparse.h5ad'
adata = sc.read_h5ad(path_to_adata, backed='r')

In [None]:
# Set memory params
tc.memory.set_memory_limit(percent_current_available=30)
tc.memory.allow_chunking = True

In [None]:
# Assign kwargs. Any unassigned args will be set to their respective function defaults
merge_clusters_kwargs = {
    'thresholds': {
        'q1_thresh': 0.5,
        'q2_thresh': None,
        'cluster_size_thresh': 15,
        'qdiff_thresh': 0.7,
        'padj_thresh': 0.05,
        'lfc_thresh': 1.0,
        'score_thresh': 200,
        'low_thresh': 1
    },
    'de_method': 'de_ebayes'
}
onestep_kwargs = OnestepKwargs(merge_clusters_kwargs=merge_clusters_kwargs)

In [None]:
# Remove old tmp_dir and make new one
try:
    shutil.rmtree(tmp_dir)
except NameError as e:
    pass # tmp_dir didn't exist
tmp_dir = tempfile.mkdtemp()

In [None]:
# normalize adata
norm_adata_path = os.path.join(tmp_dir, 'normalized.h5ad')
normalized_adata = tc.normalize(adata,copy_to=norm_adata_path)

In [None]:
# Run clustering
clusters = iter_clust(
    normalized_adata,
    min_samples=4,
    onestep_kwargs=onestep_kwargs,
    random_seed=123,
    tmp_dir=tmp_dir
)
cluster_dict = build_cluster_dict(clusters)

In [None]:
with open(output_file, 'w') as f:
    json.dump(f, cluster_dict)

In [None]:
shutil.rmtree(tmp_dir)