https://github.com/chanzuckerberg/single-cell-curation/issues/610

In [1]:
import cxg_upload
import numpy as np
import os
import scanpy as sc
import subprocess
from datetime import datetime

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    now = datetime.now() 
    dt_string = now.strftime('%m/%d %H:%M')
    adata.uns['title'] += (' - ' + dt_string)

    file = 'test.h5ad'
    adata.write(filename=file)
    adata = sc.read_h5ad(file)
    for f in adata.obsm:
        print('adata.obsm size:')
        print(adata.obsm[f].size)
    for f in adata.obsp:
        print('adata.obsp size:')
        print(adata.obsp[f].size)
    for f in adata.varm:
        print('adata.varm size:')
        print(adata.varm[f].size)
    for f in adata.varp:
        print('adata.varp size:')
        print(adata.varp[f].size)
    print('------------------')
    valid = validate(file)
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
        if expected == 'True':
            cxg_upload.upload(file)
    os.remove(file)

In [4]:
adata = sc.read_h5ad('../valid.h5ad')

### Test Valid Cases 

In [5]:
# "regular" sized obsm array
adata.uns['title'] = '610 "regular" sized obsm array'

adata.obsm['test_array'] = np.zeros(2000,dtype=float)
save_and_test(adata, 'True')
del adata.obsm['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.obsm size:
2000
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.853999 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [6]:
# "regular" sized obsp array
adata.uns['title'] = '610 "regular" sized obsp array'

adata.obsp['test_array'] = np.zeros([2000,2000],dtype=float)
save_and_test(adata, 'True')
del adata.obsp['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.obsp size:
4000000
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.847610 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [7]:
# "regular" sized varm array
adata.uns['title'] = '610 "regular" sized varm array'

adata.varm['test_array'] = np.zeros(22356,dtype=float)
save_and_test(adata, 'True')
del adata.varm['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.varm size:
22356
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.946503 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [8]:
# "regular" sized varp array
adata.uns['title'] = '610 "regular" sized varp array'

adata.varp['test_array'] = np.zeros([22356,22356],dtype=float)
save_and_test(adata, 'True')
del adata.varp['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.varp size:
499790736
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:02.020205 with status is_valid=True
------------------
[1m[92mPASSED[0m


### Test Invalid Cases 

In [9]:
# obsm with array size of 0
adata.obsm['test_array'] = np.empty(shape=(2000,0))
save_and_test(adata, 'False')
del adata.obsm['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.obsm size:
0
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: The size of the ndarray stored for a 'adata.obsm['test_array']' MUST NOT be zero.
Validation complete in 0:00:00.503327 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [10]:
# obsp with array size of 0
adata.obsp['test_array'] = np.empty(shape=(2000,2000,0))
save_and_test(adata, 'False')
del adata.obsp['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.obsp size:
0
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: The size of the ndarray stored for a 'adata.obsp['test_array']' MUST NOT be zero.
Validation complete in 0:00:00.493478 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [11]:
# varm with array size of 0
adata.varm['test_array'] = np.empty(shape=(22356,0))
save_and_test(adata, 'False')
del adata.varm['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.varm size:
0
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: The size of the ndarray stored for a 'adata.varm['test_array']' MUST NOT be zero.
Validation complete in 0:00:00.492720 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [12]:
# varp with array size of 0
adata.varp['test_array'] = np.empty(shape = (22356,22356,0))
save_and_test(adata, 'False')
del adata.varp['test_array']

adata.obsm size:
4000
adata.obsm size:
4000
adata.varp size:
0
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: The size of the ndarray stored for a 'adata.varp['test_array']' MUST NOT be zero.
Validation complete in 0:00:00.504724 with status is_valid=False
------------------
[1m[92mPASSED[0m
