In [1]:
import json
import os
import requests
import sys

In [2]:
scc_repo_loc = os.path.expanduser('~/GitClones/CZI/')
api_key_file_path = os.path.expanduser('~/Documents/keys/cxg-api-key.txt')

In [3]:
sys.path.append(os.path.abspath(scc_repo_loc + 'single-cell-curation/notebooks/curation_api/python/'))


from src.utils.config import set_api_access_config
from src.collection import get_collections
from src.dataset import get_dataset,get_datasets

In [4]:
set_api_access_config(api_key_file_path)

collections = {}
datasets = get_datasets()
for d in datasets:
    c_id = d['collection_id']
    dev_stages = {t['ontology_term_id']:t['label'] for t in d['development_stage']}
    if c_id in collections:
        collections[c_id]['donor_id'].extend(d['donor_id'])
        collections[c_id]['development_stage'].update(dev_stages)
    else:
        collections[c_id] = {
            'donor_id': d['donor_id'],
            'development_stage': dev_stages
        }
print(f'{len(collections)} Public Collections')

pub_cs = get_collections()
public_names = {c['name']:c['collection_id'] for c in pub_cs}

priv_cs = get_collections(visibility='PRIVATE')
priv_collections = {c['name']:c for c in priv_cs if not c.get('revision_of')}
print(f'{len(priv_collections)} Private Collections')

Set 'SITE_URL' env var to https://cellxgene.cziscience.com
Set 'API_URL_BASE' env var to https://api.cellxgene.cziscience.com
Successfully set 'ACCESS_TOKEN' env var!
181 Public Collections
67 Private Collections


In [5]:
current_terms = ['unknown']

for ont in ['hsapdv','mmusdv']:
    url = f'https://www.ebi.ac.uk/ols4/api/ontologies/{ont}/terms?obsoletes=false&size=500'
    r = requests.get(url).json()
    terms = [t['obo_id'] for t in r['_embedded']['terms']]
    current_terms.extend(terms)
len(current_terms)

373

In [6]:
automigrate_terms = json.load(open('automigrate_terms.json'))
donor_updates = json.load(open('donor_updates.json'))

In [7]:
#Flag any old term from automigrate_terms.json that is not deprecated
[t for t in automigrate_terms.keys() if t in current_terms]

[]

In [8]:
#Flag any new term from automigrate_terms.json that is not in the new version
[t for t in automigrate_terms.values() if t not in current_terms]

[]

In [9]:
#Flag any new term from donor_updates.json that is not in the new version
[v for val in donor_updates.values() for v in val.values() if v not in current_terms]

[]

In [10]:
#Confirm every key from donor_updates.json is either a public Collection ID or a private Collection name
for k,v in donor_updates.items():
    if k in collections:
        not_present = [d for d in v.keys() if d not in collections[k]['donor_id']]
        if not_present:
            print(f'ERROR: {",".join(not_present)} not in {k}')
    elif k in priv_collections:
        c_id = priv_collections[k]['collection_id']
        donors = []
        for d in priv_collections[k]['datasets']:
            dataset = get_dataset(c_id, d['dataset_id'])
            donors.extend(dataset['donor_id'])
        not_present = [d for d in v.keys() if d not in donors]
        if not_present:
            print(f'ERROR: {",".join(not_present)} not in {k}')
    elif k in public_names:
        print(f'ERROR: update to {public_names[k]} for {k}')
    else:
        print(f'ERROR: {k} not a public collection_id or private name')

ERROR: private_collection_title not a public collection_id or private name


In [12]:
#browse all public Collections and see what deprecated terms are not covered in this migration
for k,v in collections.items():
    dev_stages = v['development_stage']
    deprecated = [t for t in dev_stages.keys() if t not in current_terms and t not in automigrate_terms.keys()]
    if deprecated:
        print(k)
        for t in deprecated:
            print('--',t,':',dev_stages[t])
        print('')

cae8bad0-39e9-4771-85a7-822b0e06de9f
-- HsapDv:0000082 : newborn human stage

e1fa9900-3fc9-4b57-9dce-c95724c88716
-- UBERON:0018241 : prime adult stage

28e9d721-6816-48a2-8d0b-43bf0b0c0ebc
-- HsapDv:0000082 : newborn human stage

4c6eaf5c-6d57-4c76-b1e9-60df8c655f1e
-- HsapDv:0000091 : human late adulthood stage

72d37bc9-76cc-442d-9131-da0e273862db
-- HsapDv:0000082 : newborn human stage
-- MmusDv:0000037 : Theiler stage 28

4dca242c-d302-4dba-a68f-4c61e7bad553
-- UBERON:0018241 : prime adult stage

d17249d2-0e6e-4500-abb8-e6c93fa1ac6f
-- HsapDv:0000092 : human middle aged stage

10ec9198-584e-4a7e-8a24-4a332915a4ef
-- HsapDv:0000082 : newborn human stage

02b01703-bf1b-48de-b99a-23bef8cccc81
-- MmusDv:0000041 : unknown

71f4bccf-53d4-4c12-9e80-e73bfb89e398
-- HsapDv:0000090 : 25-44 year-old human stage

8c4bcf0d-b4df-45c7-888c-74fb0013e9e7
-- UBERON:0034919 : juvenile stage
-- UBERON:0018241 : prime adult stage

bacccb91-066d-4453-b70e-59de0b4598cd
-- HsapDv:0000082 : newborn human

In [29]:
#browse all private Collections and see what deprecated terms are not covered in this migration