In [None]:
import os
import pandas
import anndata
import numpy

from aavomics import database

In [None]:

ALIGNMENT_NAME = "cellranger_5.0.1_gex_mm10_2020_A"
NEW_TAXONOMY_NAME = "CCN202105060"
TAXONOMIES_TO_MERGE = ["CCN202105050", "CCN202105051"]

In [None]:
for cell_set in database.CELL_SETS:
    
    anndata_file_path = cell_set.get_anndata_file_path(alignment_name=ALIGNMENT_NAME)
    
    if not os.path.exists(anndata_file_path):
        print("Skipping %s, anndata doesn't exist" % cell_set.name)
        continue
    
    adata = anndata.read(anndata_file_path)
    
    if TAXONOMIES_TO_MERGE[0] not in adata.obs.columns:
        print("Skipping %s, anndata doesn't contain taxonomy %s" % (cell_set.name, TAXONOMIES_TO_MERGE[0]))
        continue
        
    cell_types = set()
    
    for taxonomy in TAXONOMIES_TO_MERGE:
        
        for cell_type in adata.obs[taxonomy].unique():
            if cell_type in ["nan", "Multiplets", "Debris", "Unknown"]:
                continue
            try:
                if pandas.isna(cell_type):
                    continue
            except:
                pass
            try:
                 if numpy.isnan(cell_type):
                    continue
            except:
                pass
            
            cell_types.add(cell_type)
    
    cell_type_categorical_type = pandas.CategoricalDtype(categories=list(cell_types))
    adata.obs[NEW_TAXONOMY_NAME] = pandas.Series(dtype=cell_type_categorical_type)
    
    for taxonomy in TAXONOMIES_TO_MERGE:
        for cell_type in cell_types:
            adata.obs[NEW_TAXONOMY_NAME].loc[(adata.obs[taxonomy] == cell_type) & (adata.obs["Cell Called"] == "True")] = cell_type
        
    adata.write_h5ad(anndata_file_path)