https://github.com/chanzuckerberg/single-cell-curation/issues/607

In [20]:
import numpy as np
import os
import scanpy as sc
import subprocess

In [21]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [22]:
def save_and_test(adata, expected):
    if 'schema_version' in adata.uns.keys():
        print('schema_version present:')
        print(adata.uns['schema_version'])
    else:
        print('schema_version absent')
    print('------------------')

    adata.write(filename='test.h5ad')
    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [23]:
adata = sc.read_h5ad('../valid.h5ad')

### Testing Valid Case 

In [24]:
# Schema version is not present
save_and_test(adata, 'True')

schema_version absent
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.875105 with status is_valid=True
------------------
[1m[92mPASSED[0m


### Testing Invalid Case 

In [25]:
# Schema version is present
adata.uns['schema_version'] = '4.0.0'
save_and_test(adata, 'False')

schema_version present:
4.0.0
------------------
Loading dependencies
Loading validator modules

Starting validation...
Schema version '4.0.0' is not supported. Current supported versions: '3.0.0'. Validating with latest version '3.0.0'.
Validation complete in 0:00:00.867722 with status is_valid=True
------------------
[1m[91mERROR[0m


In [26]:
# Schema version is present and null
adata.uns['schema_version'] = None
save_and_test(adata, 'False')

schema_version present:
None
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.973061 with status is_valid=True
------------------
[1m[91mERROR[0m


In [27]:
# Schema version as boolean false
adata.uns['schema_version'] = False
save_and_test(adata, 'False')

schema_version present:
False
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'False' in 'uns['schema_version']' is not valid, it must be a string.
Validation complete in 0:00:00.476179 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [28]:
# Schema version as empty string
adata.uns['schema_version'] = ''
save_and_test(adata, 'False')

schema_version present:

------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.871559 with status is_valid=True
------------------
[1m[91mERROR[0m


In [29]:
# Schema version as NaN
adata.uns['schema_version'] = 'NaN'
save_and_test(adata, 'False')

schema_version present:
NaN
------------------
Loading dependencies
Loading validator modules

Starting validation...
Traceback (most recent call last):
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/bin/cellxgene-schema", line 8, in <module>
    sys.exit(schema_cli())
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/lib/python3.10/site-packages/click/core.py", line 1130, in __call__
    return self.main(*args, **kwargs)
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/lib/python3.10/site-packages/click/core.py", line 1055, in main
    rv = self.invoke(ctx)
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/Users/jchaffer/opt/anaconda3/envs/cxg/lib/python3.10/site-packages/click/core.py", line 760, i