https://github.com/chanzuckerberg/single-cell-curation/issues/514<br>
https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md#tissue_type

In [None]:
import numpy as np
import os
import scanpy as sc
import subprocess

In [None]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [None]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    test_adata = sc.read_h5ad('test.h5ad')
    if 'tissue_type' in test_adata.obs.columns:
        print(test_adata.obs[['tissue_type','tissue_ontology_term_id']].value_counts())
    else:
        print('tissue_type absent')
    print('------------------')

    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [None]:
adata = sc.read_h5ad('../valid.h5ad') #backed='r' would be slightly quicker but produces an error with multiple writes

**Test valid cases**

In [None]:
#tissue/UBERON
adata.obs['tissue_type'] = 'tissue'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0004784'
save_and_test(adata, 'True')

In [None]:
#organoid/UBERON
adata.obs['tissue_type'] = 'organoid'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0004784'
save_and_test(adata, 'True')

In [None]:
#cell culture/CL
adata.obs['tissue_type'] = 'cell culture'
adata.obs['tissue_ontology_term_id'] = 'CL:0000034'
save_and_test(adata, 'True')

**Test invalid cases**

In [None]:
#Tissue
adata.obs['tissue_type'] = 'Tissue'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0004784'
save_and_test(adata, 'False')

In [None]:
#null
adata.obs['tissue_type'] = np.NaN
save_and_test(adata, 'False')

In [None]:
#cell culture/UBERON
adata.obs['tissue_type'] = 'cell culture'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0004784'
save_and_test(adata, 'False')

In [None]:
#organoid/CL
adata.obs['tissue_type'] = 'organoid'
adata.obs['tissue_ontology_term_id'] = 'CL:0000034'
save_and_test(adata, 'False')

In [None]:
#tissue/CL
adata.obs['tissue_type'] = 'tissue'
adata.obs['tissue_ontology_term_id'] = 'CL:0000034'
save_and_test(adata, 'False')

In [None]:
#too broad UBERON
adata.obs['tissue_type'] = 'tissue'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0001062'
save_and_test(adata, 'False')

In [None]:
#no tissue_type
adata.obs.drop(columns=['tissue_type'], inplace=True)
save_and_test(adata, 'False')

In [None]:
#current notation
adata.obs['tissue_type'] = 'organoid'
adata.obs['tissue_ontology_term_id'] = 'UBERON:0001062 (organoid)'
save_and_test(adata, 'False')