https://github.com/chanzuckerberg/single-cell-curation/issues/607

In [None]:
import numpy as np
import os
import scanpy as sc
import subprocess

In [None]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [None]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    adata = sc.read_h5ad('test.h5ad')
    if 'schema_version' in adata.uns.keys():
        print('schema_version present:')
        print(adata.uns['schema_version'])
    else:
        print('schema_version absent')
    print('------------------')
    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [None]:
adata = sc.read_h5ad('../valid.h5ad')

### Testing Valid Case 

In [None]:
# Schema version is not present
save_and_test(adata, 'True')

### Testing Invalid Case 

In [None]:
# Schema version is present
adata.uns['schema_version'] = '4.0.0'
save_and_test(adata, 'False')

In [None]:
# Schema version is present and null
adata.uns['schema_version'] = None
save_and_test(adata, 'False')

In [None]:
# Schema version as boolean false
adata.uns['schema_version'] = False
save_and_test(adata, 'False')

In [None]:
# Schema version as empty string
adata.uns['schema_version'] = ''
save_and_test(adata, 'False')

In [None]:
# Schema version as NaN
adata.uns['schema_version'] = 'NaN'
save_and_test(adata, 'False')