In [None]:
import anndata as ad
import pandas as pd
import scanpy as sc
from scipy import sparse
import subprocess

# Loading the AnnData object
**Update to your local directory where the matrix file is located & the name of the file (without the .h5ad extension)**

In [None]:
file = 'local'

**Load the AnnData object**<br>
*`backed='r'` saves memory but assumes metadata updates only*

In [None]:
adata = sc.read_h5ad(file + '.h5ad',backed='r')
adata

**Portal fields are defined & removed**

In [None]:
portal_obs = [
    'assay',
    'cell_type',
    'development_stage',
    'disease',
    'ethnicity',
    'organism',
    'sex',
    'tissue'
]

portal_var = [
    'feature_name',
    'feature_reference'
]

adata.obs.drop(columns=portal_obs, inplace=True)
adata.var.drop(columns=portal_var, inplace=True)


if adata.raw:
    remove_raw_var = [p for p in portal_var if p in adata.raw.var]
    if remove_raw_var:
        adata.raw.var.drop(columns=remove_raw_var, inplace=True)

# INSERT UPDATES HERE

**Plot the cells to ensure they cluster by cell type**

In [None]:
default_embedding = adata.uns.get('default_embedding',adata.obsm_keys()[0])
sc.set_figure_params(dpi=150)
sc.pl.embedding(adata, basis=default_embedding, color=['cell_type_ontology_term_id'])

**The above plot will set a color palette in uns, so remove that**

In [None]:
del adata.uns['cell_type_ontology_term_id_colors']

**Write the file**

In [None]:
new_one = file + '_revised.h5ad'
adata.write(filename=new_one, compression='gzip')
new_one

**Run the CELLxGENE validator**

In [None]:
validate_process = subprocess.run(['cellxgene-schema', 'validate', new_one], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for line in validate_process.stdout.decode('utf-8').split('\n'):
    print(line)
for line in validate_process.stderr.decode('utf-8').split('\n'):
    print(line)