In [None]:
from datetime import datetime
import lattice
import json
import requests
import pandas as pd
from urllib.parse import urljoin


mode = 'prod'
connection = lattice.Connection(mode)
prod_server = connection.server

In [None]:
demo_server = 'http://ec2-54-176-95-4.us-west-1.compute.amazonaws.com/'

In [None]:
def compare_search(add_url, audit=False):
    url = urljoin(prod_server, add_url)
    results = requests.get(url, auth=connection.auth).json()
    if audit:
        prod = {}
        for f in results['facets']:
            if f['field'].startswith('audit'):
                for t in f['terms']:
                    prod[t['key']] = t['doc_count']
    else:
        prod = {r['key']: r['doc_count'] for r in results['facets'][0]['terms']}

    url = urljoin(demo_server, add_url)
    results = requests.get(url, auth=connection.auth).json()
    if audit:
        demo = {}
        for f in results['facets']:
            if f['field'].startswith('audit'):
                for t in f['terms']:
                    demo[t['key']] = t['doc_count']
    else:
        demo = {r['key']: r['doc_count'] for r in results['facets'][0]['terms']}

    issues = []
    for k,v in prod.items():
        if v != demo.get(k, 0):
            issues.append({'type': k, 'prod': v, 'demo': demo.get(k, 0)})
    for k,v in demo.items():
        if k not in prod:
            issues.append({'type': k, 'prod': 0, 'demo': v})
    if issues:
        return pd.DataFrame(issues).set_index('type')
    else:
        return 'No difference'

In [None]:
#ensure that the demo is done indexing
url = urljoin(demo_server, '_indexer')
r = requests.get(url, auth=connection.auth).json()
print(r['status'])
if r['status'] == 'waiting':
    if(r['results']):
        print(r['results'][0]['cycle_took'])
else:
    start = datetime.strptime(r['started'], '%Y-%m-%dT%H:%M:%S.%f')
    now = datetime.utcnow()
    elapsed = now - start
    print(elapsed)

In [None]:
print('COMPARE total object counts per type')
compare_search('search/?type=*')

In [None]:
print('COMPARE object counts with INTERNAL ACTION')
compare_search('search/?type=*&audit.INTERNAL_ACTION=*')

In [None]:
print('COMPARE object counts with AUDITS')
compare_search('search/?type=*', audit=True)

In [None]:
#check properties of OntologyTerm objects
slim_fields = [
    'organ_slims',
    'system_slims',
    'cell_slims',
    'development_slims',
    'disease_slims',
    'ethnicity_slims',
    'qa_slims'
]

url = urljoin(prod_server, 'search/?type=OntologyTerm&limit=all&field=' + '&field='.join(slim_fields))
results = requests.get(url, auth=connection.auth).json()
prod_terms = {}
for r in results['@graph']:
    prod_terms[r['@id']] = {}
    for sf in slim_fields:
        if sf in r:
            prod_terms[r['@id']][sf] = r[sf]

url = urljoin(prod_server, 'search/?type=OntologyTerm&limit=all&field=' + '&field='.join(slim_fields))
results = requests.get(url, auth=connection.auth).json()
demo_terms = {}
for r in results['@graph']:
    demo_terms[r['@id']] = {}
    for sf in slim_fields:
        if sf in r:
            demo_terms[r['@id']][sf] = r[sf]

issues = []
for k,v in prod_terms.items():
    if k in demo_terms:
        for k2,v2 in v.items():
            if v2 != demo_terms[k].get(k2):
                issues.append({
                    'term': k,
                    'slim': k2,
                    'prod': v2,
                    'demo': demo_terms[k].get(k2)
                })
    else:
        issues.append({
            'term': k,
            'demo': 'absent'
        })
pd.DataFrame(issues)