In [None]:
import lattice
import json
import os
import pandas as pd
import requests
from datetime import datetime
from urllib.parse import urljoin


connection = lattice.Connection('prod')
prod_server = connection.server

In [None]:
demo_server = ''

In [None]:
def compare_search(add_url, audit=False):
    url = urljoin(prod_server, add_url)
    results = requests.get(url, auth=connection.auth).json()
    if audit:
        prod = {}
        for f in results['facets']:
            if f['field'].startswith('audit'):
                for t in f['terms']:
                    prod[t['key']] = t['doc_count']
    else:
        prod = {r['key']: r['doc_count'] for r in results['facets'][0]['terms']}

    url = urljoin(demo_server, add_url)
    results = requests.get(url, auth=connection.auth).json()
    if audit:
        demo = {}
        for f in results['facets']:
            if f['field'].startswith('audit'):
                for t in f['terms']:
                    demo[t['key']] = t['doc_count']
    else:
        demo = {r['key']: r['doc_count'] for r in results['facets'][0]['terms']}

    issues = []
    for k,v in prod.items():
        if v != demo.get(k, 0):
            issues.append({'type': k, 'prod': v, 'demo': demo.get(k, 0)})
    for k,v in demo.items():
        if k not in prod:
            issues.append({'type': k, 'prod': 0, 'demo': v})
    if issues:
        return pd.DataFrame(issues).set_index('type')
    else:
        return 'No difference'

In [None]:
#ensure that the demo is done indexing
url = urljoin(demo_server, '_indexer')
r = requests.get(url, auth=connection.auth).json()
print(r['status'])
if r['status'] == 'waiting':
    if(r['results']):
        print(r['results'][0]['cycle_took'])
else:
    start = datetime.strptime(r['started'], '%Y-%m-%dT%H:%M:%S.%f')
    now = datetime.utcnow()
    elapsed = now - start
    print(elapsed)

In [None]:
print('COMPARE total object counts per type')
compare_search('search/?type=*')

In [None]:
print('COMPARE object counts with INTERNAL ACTION')
compare_search('search/?type=*&audit.INTERNAL_ACTION=*')

In [None]:
print('COMPARE object counts with AUDITS')
compare_search('search/?type=*', audit=True)

In [None]:
#check properties of OntologyTerm objects
slim_fields = [
    'organ_slims',
    'system_slims',
    'cell_slims',
    'development_slims',
    'disease_slims',
    'qa_slims'
]

url = urljoin(prod_server, 'search/?type=OntologyTerm&limit=all&field=' + '&field='.join(slim_fields))
results = requests.get(url, auth=connection.auth).json()
prod_terms = {}
for r in results['@graph']:
    prod_terms[r['@id']] = {}
    for sf in slim_fields:
        if sf in r:
            prod_terms[r['@id']][sf] = r[sf]

url = urljoin(demo_server, 'search/?type=OntologyTerm&limit=all&field=' + '&field='.join(slim_fields))
results = requests.get(url, auth=connection.auth).json()
demo_terms = {}
for r in results['@graph']:
    demo_terms[r['@id']] = {}
    for sf in slim_fields:
        if sf in r:
            demo_terms[r['@id']][sf] = r[sf]

issues = []
for k,v in prod_terms.items():
    if k in demo_terms:
        for k2,v2 in v.items():
            if v2 != demo_terms[k].get(k2):
                issues.append({
                    'term': k,
                    'slim': k2,
                    'prod': v2,
                    'demo': demo_terms[k].get(k2)
                })
    else:
        issues.append({
            'term': k,
            'demo': 'absent'
        })
pd.DataFrame(issues)

# enforce unique arrays, block additional properties

In [None]:
url = urljoin(demo_server, 'profiles/?format=json')
results = requests.get(url).json()
for k,v in results.items():
    if k not in ['_subtypes','@type']:
        for p,s in v['properties'].items():
            if s['type'] == 'object':
                if s.get('additionalProperties') != False:
                    print('additionalProperties allowed', k + '.' + p)
                for p2, s2 in s['properties'].items():
                    if s2['type'] == 'array' and p2 != '@type' and s2.get('notSubmittable') != True and not s2.get('uniqueItems'):
                        print('non-unique allowed', k + '.' + p + '.' + p2)
            elif s['type'] == 'array':
                if s['items']['type'] == 'object' and s['items'].get('additionalProperties') != False:
                    print('additionalProperties allowed', k + '.' + p)
                if p != '@type' and s.get('notSubmittable') != True and not s.get('uniqueItems'):
                    print('non-unique allowed', k + '.' + p)

# validate slim labels

In [None]:
url = 'https://raw.githubusercontent.com/Lattice-Data/encoded/refs/heads/dev/src/encoded/types/ontology_term.py'
content = requests.get(url).text.split('\n')

s_l = 'system_slim_terms = {'
s_l_num = content.index(s_l)

e_l = '@collection('
e_l_num = content.index(e_l)

with open('slims_temp.py', 'w') as f:
    f.write('\n'.join(content[start_l_num:e_l_num]))

from slims_temp import *

slims = {
    **system_slim_terms, **organ_slim_terms, **cell_slim_terms,
    **disease_slim_terms, **development_slim_terms, **ethnicity_slim_terms
} #we don't worry about qa_slim_terms

url = urljoin(demo_server, 'search/?type=OntologyTerm&limit=all&field=term_name&field=term_id&field=audit')
results = requests.get(url, auth=connection.auth).json()

for term in results['@graph']:
    if term['term_id'] in slims:
        curr_label = slims[term['term_id']]
        if term['term_name'] != curr_label:
            print(f"update {term['term_id']} from {curr_label} to {term['term_name']}")
        elif term.get('audit'):
            print(term['term_id'],term['audit'])
all_obj_ids = [term['term_id'] for term in results['@graph']]
missing_objs = [s for s in slims.keys() if s not in all_obj_ids]
for t in missing_objs:
    print('add obj',t)

os.remove('slims_temp.py')