https://github.com/chanzuckerberg/single-cell-curation/issues/405

In [1]:
import numpy as np
import os
import scanpy as sc
import subprocess
import anndata as ad
from scipy import sparse

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    adata = sc.read_h5ad('test.h5ad')

    print("A valid h5ad")
    print('------------------')

    valid = validate('test.h5ad')
    print('------------------')
    
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

## Test Valid Cases

In [4]:
# Validate and add labels to valid.h5ad with bool columns
adata = sc.read_h5ad("../valid.h5ad")
save_and_test(adata,'True')

A valid h5ad
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.688623 with status is_valid=True
------------------
[1m[92mPASSED[0m


## Test Invalid Cases

In [5]:
# The original invalid file is the only file we can obtain with a column of category type containing boolean objects
# File is on google drive: https://drive.google.com/drive/u/1/folders/1PJGniNxWStKGge-cE1cu6DfLtTEZFrlz
print("Validating original h5ad with column containing category of boolean type")
print('------------------')
valid = validate("example_addlabelsFail.h5ad")
expected = 'False'

if expected != valid:
    print('\033[1m\033[91mERROR\033[0m')
else:
    print('\033[1m\033[92mPASSED\033[0m')

Validating original h5ad with column containing category of boolean type
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:54.785192 with status is_valid=True
[1m[91mERROR[0m
