In [62]:
import numpy as np
import csv
import pandas as pd
import scanpy as sc
import sys
import scanpy as sc
import scipy.sparse

In [63]:
def read_sc(count, meta, celltype_key='cell_type', batch_key=None, categorical_covariate_key=None,
            continuous_covariate_key=None):
    count = sc.read_csv(count)
    count.X = scipy.sparse.csc_matrix(count.X)

    meta = pd.read_csv(meta, index_col=0)

    if all(count.obs.index == meta.index):
        count.obs = pd.concat([count.obs, meta], axis=1)

    if celltype_key not in count.obs.columns:
        raise KeyError('celltype_key ' + celltype_key + ' not found in ' + ', '.join(count.obs.columns) + '!')
    if batch_key is not None and batch_key not in count.obs.columns:
        raise KeyError('batch_key ' + batch_key + ' not found in ' + ', '.join(count.obs.columns) + '!')
    if categorical_covariate_key is not None and not all(x in count.obs.columns for x in categorical_covariate_key):
        raise KeyError('categorical_covariate_key ' + categorical_covariate_key + 'is not a subset of ' + ', '.join(
            count.obs.columns) + '!')
    if continuous_covariate_key is not None and not all(x in count.obs.columns for x in continuous_covariate_key):
        raise KeyError('continuous_covariate_key ' + continuous_covariate_key + 'is not a subset of ' + ', '.join(
            count.obs.columns) + '!')

    return count

In [50]:
def process_to_cpdb(sc_path, save_path, name="test", annotation_name="cell_type"):
    adata_sc = sc.read_h5ad(sc_path)
    adata_sc.X = adata_sc.X.A

    utils.warn(' --- Generating count file --- ')
    # set df.dtypes = float32
    df_expr_matrix = pd.DataFrame(sc.pp.normalize_total(adata_sc, inplace=False)["X"], dtype="float")
    df_expr_matrix = df_expr_matrix.T
    # set cell ids as columns
    df_expr_matrix.columns = adata_sc.obs.index
    # genes should be either Ensembl IDs or gene names
    # transform orthologs
    index = adata_sc.var.index
    genes = got.rename_gene(np.array(index))
    df_expr_matrix.set_index(genes, inplace=True)

    utils.warn(' --- Saving count file... --- ')
    df_expr_matrix.to_csv(save_path + name + ".counts.txt", sep='\t')

    utils.warn(' --- Generating metadata file --- ')
    df_meta = pd.DataFrame(
        data={'Cell': list(adata_sc.obs.index), 'cell_type': list(adata_sc.obs[annotation_name])}
    )
    df_meta.set_index('Cell', inplace=True)
    utils.warn(' --- Saving metadata file... --- ')
    df_meta.to_csv(save_path + name + ".meta.txt", sep='\t')

In [51]:
if __name__ == "__main__":
    process_to_cpdb(
        sc_path="../data/STW-M-Brain-Stereo-seq-1/coronal_1.bin50.adata_sc.clusters.h5ad",
        annotation_name="cell_type",
        save_path="../data/STW-M-Brain-Stereo-seq-1/",
        name="coronal_1"
    )

In [59]:
adata_sc = sc.read_h5ad("data/STW-M-Brain-Stereo-seq-1/coronal_1.bin50.adata_sc.clusters.h5ad")
adata_sc.var.index.values[2000]

'amhr2'

In [64]:
adata_sc = read_sc(count='data/STW-M-Brain-Stereo-seq-1/counts.csv.gz',
                             meta='data/STW-M-Brain-Stereo-seq-1/labels.csv.gz',
                             celltype_key="cell_type")
adata_sc.var.index.values[2000]

'Amer3'

In [66]:
adata_sp = sc.read_h5ad("data/STW-M-Brain-Stereo-seq-1/coronal_1.bin50.adata_sp_ann.clusters.h5ad")
adata_sp.var.index.values[2000]

'Aco2'