https://github.com/chanzuckerberg/single-cell-curation/issues/515

In [1]:
import numpy as np
import os
import scanpy as sc
import subprocess

In [2]:
def validate(file):
    validate_process = subprocess.run(['cellxgene-schema', 'validate', file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    for line in validate_process.stdout.decode('utf-8').split('\n'):
        print(line)
    for line in validate_process.stderr.decode('utf-8').split('\n'):
        print(line)
        if 'is_valid=' in line:
            valid = line.split('=')[-1]
            return valid

In [3]:
def save_and_test(adata, expected):
    adata.write(filename='test.h5ad')
    adata = sc.read_h5ad('test.h5ad')
    if 'organism_ontology_term_id' in adata.obs.columns:
        print(adata.obs[['organism_ontology_term_id','self_reported_ethnicity_ontology_term_id']].value_counts())
    else:
        print('organism_ontology_term_id absent')
    print('------------------')
    valid = validate('test.h5ad')
    print('------------------')
    if expected != valid:
        print('\033[1m\033[91mERROR\033[0m')
    else:
        print('\033[1m\033[92mPASSED\033[0m')
    os.remove('test.h5ad')

In [4]:
adata = sc.read_h5ad('../valid.h5ad')

### Test Valid Cases 

In [8]:
# self_reported_ethnicty_ontology_term_id as unknown
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'unknown'
adata.obs['development_stage_ontology_term_id'] = 'unknown'
save_and_test(adata, 'True')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             unknown                                     2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.874154 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [9]:
# comma separated list for self_reported_ethnicity_ontology_term_id
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,HANCESTRO:0014'
save_and_test(adata, 'True')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005,HANCESTRO:0014               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.869773 with status is_valid=True
------------------
[1m[92mPASSED[0m


In [10]:
# self_reported_ethnicity_ontology_term_id as na
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:10090'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'na'
save_and_test(adata, 'True')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:10090            na                                          2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
Validation complete in 0:00:00.886473 with status is_valid=True
------------------
[1m[92mPASSED[0m


### Test Invalid Cases 

In [11]:
#multiethnic
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'multiethnic'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             multiethnic                                 2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'multiethnic' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.575086 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [12]:
# orangism_ontology_term_id absent
adata.obs.drop(columns=['organism_ontology_term_id'], inplace=True)
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005'
save_and_test(adata, 'False')

organism_ontology_term_id absent
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: Dataframe 'obs' is missing column 'organism_ontology_term_id'.
ERROR: Checking values with dependencies failed for adata.obs['self_reported_ethnicity_ontology_term_id'], this is likely due to missing dependent column in adata.obs.
ERROR: Checking values with dependencies failed for adata.obs['development_stage_ontology_term_id'], this is likely due to missing dependent column in adata.obs.
Validation complete in 0:00:00.593389 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [13]:
# single self_reported_ethnicity_ontology_term_id term with leading space
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = ' HANCESTRO:0005'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606              HANCESTRO:0005                             2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: ' HANCESTRO:0005' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.518596 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [14]:
# single self_reported_ethnicity_ontology_term_id term with trailing space
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005 '
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0005 ' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.515149 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [15]:
# single self_reported_ethnicity_ontology_term_id term with leading and trailing space
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = ' HANCESTRO:0005 '
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606              HANCESTRO:0005                             2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: ' HANCESTRO:0005 ' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.528122 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [16]:
# list of self_reported_ethnicity_ontology_term_id terms with one having leading space
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005, HANCESTRO:0014'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005, HANCESTRO:0014              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: ' HANCESTRO:0014' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.513412 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [17]:
# list of self_reported_ethnicity_ontology_term_id terms with one having trailing space
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005 ,HANCESTRO:0014'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005 ,HANCESTRO:0014              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0005 ' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.525109 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [18]:
# list of self_reported_ethnicity_ontology_term_id not in ascending lexical order
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0014,HANCESTRO:0005'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0014,HANCESTRO:0005               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0014,HANCESTRO:0005' in 'self_reported_ethnicity_ontology_term_id' is not in ascending lexical order. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.514896 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [19]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0002
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0002'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0002                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0002' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.514706 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [20]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0003
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0003'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0003                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0003' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.517374 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [21]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0004
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0004'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0004                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0004' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.596482 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [22]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0018
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0018'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0018                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0018' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.530657 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [23]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0290
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0290'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0290                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0290' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.523487 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [24]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0304
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0304'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0304                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0304' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.521001 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [25]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0323
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0323'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0323                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0323' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.517649 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [26]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0324
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0324'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0324                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0324' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.521779 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [27]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0551
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0551'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0551                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0551' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.514690 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [28]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0554
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0554'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0554                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0554' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.517733 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [29]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0555
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0555'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0555                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0555' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.519740 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [30]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0557
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0557'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0557                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0557' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.522760 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [31]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0558
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0558'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0558                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0558' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.520235 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [32]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0559
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0559'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0559                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0559' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.524198 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [33]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0560
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0560'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0560                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0560' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.520939 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [34]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0561
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0561'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0561                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0561' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.513786 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [35]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0564
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0564'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0564                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0564' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.521581 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [36]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0565
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0565'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0565                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0565' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.533324 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [37]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0566
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0566'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0566                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0566' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.533919 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [38]:
# self_reported_ethnicity_ontology_term_id as GEO:000000374
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'GEO:000000374'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             GEO:000000374                               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'GEO:000000374' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.519333 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [39]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0029
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0029'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0029                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0029' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.521574 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [40]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0030
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0030'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0030                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0030' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.518828 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [41]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0031
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0031'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0031                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0031' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.576064 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [42]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0032
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0032'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0032                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0032' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.588872 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [43]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0033
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0033'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0033                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0033' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.523278 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [44]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0034
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0034'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0034                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0034' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.517707 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [45]:
# list of self_reported_ethnicity_ontology_term_id with valid (HANCESTRO:0005:0014) and unknown
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,unknown'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005,unknown                      2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'unknown' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.518398 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [46]:
# list of self_reported_ethnicity_ontology_term_id with valid (HANCESTRO:0005,HANCESTRO:0014) and invalid (HANCESTRO:0034) ids
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,HANCESTRO:0014,HANCESTRO:0034'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id    
NCBITaxon:9606             HANCESTRO:0005,HANCESTRO:0014,HANCESTRO:0034    2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0034' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.530687 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [47]:
# list of self_reported_ethnicity_ontology_term_id with invalid (HANCESTRO:0033,HANCESTRO:0034) ids
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0033,HANCESTRO:0034'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0033,HANCESTRO:0034               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0033' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
ERROR: 'HANCESTRO:0034' in 'self_reported_ethnicity_ontology_term_id' is not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, 

In [48]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0051, child of HANCESTRO:0002
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0051'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0051                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0051' in 'self_reported_ethnicity_ontology_term_id' is not allowed. Child terms of 'HANCESTRO:0002' are not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.524051 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [49]:
# self_reported_ethnicity_ontology_term_id as HANCESTRO:0306, child of HANCESTRO:0304
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0306'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0306                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0306' in 'self_reported_ethnicity_ontology_term_id' is not allowed. Child terms of 'HANCESTRO:0304' are not allowed. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.510952 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [50]:
# list of self_reported_ethnicity_ontology_term_id with valid (HANCESTRO:0005) ids but duplicated
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,HANCESTRO:0005'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             HANCESTRO:0005,HANCESTRO:0005               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0005,HANCESTRO:0005' in 'self_reported_ethnicity_ontology_term_id' contains duplicates. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
Validation complete in 0:00:00.517032 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [51]:
# list of valid self_reported_ethnicity_ontology_term_id as dtype list
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:9606'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,HANCESTRO:0014'
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].str.split()
adata.obs['self_reported_ethnicity_ontology_term_id'] = adata.obs['self_reported_ethnicity_ontology_term_id'].astype('str')
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:9606             ['HANCESTRO:0005,HANCESTRO:0014']           2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: '['HANCESTRO:0005' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no leading or trailing spaces) HANCESTRO terms in ascending lexical order, or 'unknown' if unavailable. Cannot match any forbidden HANCESTRO terms listed in schema definition.
ERROR: 'HANCESTRO:0014']' in 'self_reported_ethnicity_ontology_term_id' is not a valid ontology term id of 'HANCESTRO'. When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be formatted as one or more comma-separated (with no lead

In [52]:
# list of valid self_reported_ethnicity_ontology_term_id HANCESTRO terms with mouse for organism
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:10090'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0005,HANCESTRO:0014'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:10090            HANCESTRO:0005,HANCESTRO:0014               2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0005,HANCESTRO:0014' in 'self_reported_ethnicity_ontology_term_id' is not a valid value of 'self_reported_ethnicity_ontology_term_id'. When 'organism_ontology_term_id' is NOT 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be 'na'.
Validation complete in 0:00:00.513593 with status is_valid=False
------------------
[1m[92mPASSED[0m


In [53]:
# valid HANCESTRO self_reported_ethnicity_ontology_term_id with mouse for organism
adata.obs['organism_ontology_term_id'] = 'NCBITaxon:10090'
adata.obs['self_reported_ethnicity_ontology_term_id'] = 'HANCESTRO:0014'
save_and_test(adata, 'False')

organism_ontology_term_id  self_reported_ethnicity_ontology_term_id
NCBITaxon:10090            HANCESTRO:0014                              2000
dtype: int64
------------------
Loading dependencies
Loading validator modules

Starting validation...
ERROR: 'HANCESTRO:0014' in 'self_reported_ethnicity_ontology_term_id' is not a valid value of 'self_reported_ethnicity_ontology_term_id'. When 'organism_ontology_term_id' is NOT 'NCBITaxon:9606' (Homo sapiens), self_reported_ethnicity_ontology_term_id MUST be 'na'.
Validation complete in 0:00:00.521896 with status is_valid=False
------------------
[1m[92mPASSED[0m
