https://github.com/chanzuckerberg/single-cell-curation/issues/610

In [None]:
import numpy as np
import os
import scanpy as sc
import subprocess

In [None]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [None]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    adata = sc.read_h5ad('test.h5ad')
    for f in adata.obsm:
        print('adata.obsm size:')
        print(adata.obsm[f].size)
    for f in adata.obsp:
        print('adata.obsp size:')
        print(adata.obsp[f].size)
    for f in adata.varm:
        print('adata.varm size:')
        print(adata.varm[f].size)
    for f in adata.varp:
        print('adata.varp size:')
        print(adata.varp[f].size)
    print('------------------')
    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [None]:
adata = sc.read_h5ad('valid.h5ad')

### Test Valid Cases 

In [None]:
# "regular" sized obsm array
adata.obsm['test_array'] = np.zeros(2000,dtype=float)
save_and_test(adata, 'True')
del adata.obsm['test_array']

In [None]:
# "regular" sized obsp array
adata.obsp['test_array'] = np.zeros([2000,2000],dtype=float)
save_and_test(adata, 'True')
del adata.obsp['test_array']

In [None]:
# "regular" sized varm array
adata.varm['test_array'] = np.zeros(22356,dtype=float)
save_and_test(adata, 'True')
del adata.varm['test_array']

In [None]:
# "regular" sized varp array
adata.varp['test_array'] = np.zeros([22356,22356],dtype=float)
save_and_test(adata, 'True')
del adata.varp['test_array']

### Test Invalid Cases 

In [None]:
# obsm with array size of 0
adata.obsm['test_array'] = np.empty(shape=(2000,0))
save_and_test(adata, 'False')
del adata.obsm['test_array']

In [None]:
# obsp with array size of 0
adata.obsp['test_array'] = np.empty(shape=(2000,2000,0))
save_and_test(adata, 'False')
del adata.obsp['test_array']

In [None]:
# varm with array size of 0
adata.varm['test_array'] = np.empty(shape=(22356,0))
save_and_test(adata, 'False')
del adata.varm['test_array']

In [None]:
# varp with array size of 0
adata.varp['test_array'] = np.empty(shape = (22356,22356,0))
save_and_test(adata, 'False')
del adata.varp['test_array']