**Required**
* Connect to FHIR Synthetic Patient Data
* Connect to OHDSI Synthetic Patient Data
* Connect to HUSH+ Synthetic Patient Data

**Questions**
* What are potentially diagnostic cluster groups to inform drug selection?
* Are there differences in exposures among patients diagnosed with Asthma during childhood vs during adulthood?
* Are there differences in medications prescribed off-label by patients diagnosed with Asthma during childhood vs during adulthood?

In [68]:
## All the imports we need
from urllib2 import Request, urlopen, URLError
from urllib import quote_plus

import mysql.connector

import pprint, json, requests

from datetime import datetime, timedelta
from dateutil.parser import parse as parse_date
from greentranslator.api import GreenTranslator

import dateutil

DISEASE2SYMPTOMS = [x.split("\t") for x in urlopen("https://www.nature.com/article-assets/npg/ncomms/2014/140626/ncomms5212/extref/ncomms5212-s4.txt").read().split("\n")]
DISEASE2SYMPTOMS = filter(lambda x: len(x) == 4, DISEASE2SYMPTOMS)
print DISEASE2SYMPTOMS[:5]

In [69]:
#connect to UMLS
try:
    cnx = mysql.connector.connect(user='tadmin',
                                password='ncats_translator!',
                                database='umls',
                                host='translator.ceyknq0yekb3.us-east-1.rds.amazonaws.com')
except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
else:
    print
    #cnx.close()




In [70]:
#Define functions to use UMLS
def findICD10(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD10' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd10 = "Undef"
        for code in cursor:
            icd10 = code
        return (icd10[0])
    return ("Undef")

def findICD9(name):
    cursor = cnx.cursor()
    query = ("SELECT CUI FROM umls.MRCONSO WHERE STR='"+name+"'")
    cursor.execute(query, ())
    res = "Undef"
    for code in cursor:
        if res=="Undef":
            res = code
    if res != "Undef":
        query = ("SELECT CODE FROM umls.MRCONSO WHERE SAB='ICD9CM' AND CUI='"+res[0]+"'")
        cursor.execute(query, ())
        icd9 = "Undef"
        for code in cursor:
            icd9 = code
        return (icd9[0])
    return ("Undef")

**Question: What are potentially diagnostic cluster groups to inform drug selection?**
* Find patients diagnosed with Asthma
* Find symptoms for Asthma
* Find occurences of symptoms in Asthma patients
* Find symptom clusters among Asthma patients

In [72]:
## Given disease/condition term, get back ICD codes
def txt2icd(txt):
    url_con = "http://api.ohdsi.org/WebAPI/vocabulary/search"
    headers = {'content-type': 'application/json'}
    params = {"QUERY": "Asthma",
              "VOCABULARY_ID": ["ICD9CM"]
    }
    response = requests.post(url_con, data=json.dumps(params), headers=headers)
    data= json.loads(response.text.decode('utf-8'))
    icd9arr=[]
    for d in data:
        icd9arr.append(d["CONCEPT_CODE"])
    return icd9arr
print txt2icd('asthma')

[u'E945.7', u'493', u'493.9', u'493.90', u'493.92', u'493.91', u'493.2', u'493.20', u'493.22', u'493.21', u'493.82', u'493.0', u'493.00', u'493.02', u'493.01', u'V17.5', u'493.1', u'493.10', u'493.12', u'493.11', u'493.8', u'975.7']


In [73]:
#Find symptoms for Asthma
DISEASE2SYMPTOMS = [x.split("\t") for x in urlopen("https://www.nature.com/article-assets/npg/ncomms/2014/140626/ncomms5212/extref/ncomms5212-s4.txt").read().split("\n")]
DISEASE2SYMPTOMS = filter(lambda x: len(x) == 4, DISEASE2SYMPTOMS)
#print DISEASE2SYMPTOMS

In [21]:
# Find Asthma ICD10 codes
asthmaCodes = findICD10('Asthma')
#findICD10('Asthma')
findICD9('Fever')

u'780.60'

In [None]:
## Given disease name, get back symptoms (defined using MeSH terms) along with TFIDF scores
## Taken from https://www.nature.com/articles/ncomms5212
def disease2symptom(txt):
    s = filter(lambda x: txt.lower() in x[1].lower(), DISEASE2SYMPTOMS)
    return([(x[0], x[3]) for x in s])
disease = "Asthma"
symps = disease2symptom(disease)
print 'Found %s symptom MeSH terms for %s' % (len(symps), disease)

#for s in symps:
 #   findICD9(s)

In [75]:
## Pull JHU FHIR patients matching an ICD code
import requests
import urllib2
import json

#base_url = 'http://ictrweb.johnshopkins.edu/rest/synthetic'
#patients = base_url+"/Patient"+"?_count=50"

#req = requests.get(patients)

def findPatients(code):
    try:
        response = urllib2.urlopen("http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+code+"")
    except:
        print "I can't find patients"
        exit()
    html = response.read()
    return (html)

In [77]:
## Pull HUSH+ patients matching a list of ICD codes
query = GreenTranslator ().get_query ()

HUSHpluspatients = query.clinical_get_patients (age='8', sex='male', 
                                        race='white', location='OUTPATIENT')
##pprint.pprint (HUSHpluspatients)

**Question: Are there differences in exposures among patients diagnosed with Asthma during childhood vs during adulthood?**
* Find age of diagosis for Asthma patients
* Find zipcode for Asthma patients
* Find exposures by zipcode for Asthma patients
* Compare exposures of patients diagnosed with Asthma during childhood vs during adulthood

In [66]:
#Define functions needed to connect patients with exposures
def findAgeZipcode(stuff):
    res = []
    for p in stuff['entry']:
        d1 = p['assertedDate']
        id = p['subject']['reference'][8:]
        url = 'http://ictrweb.johnshopkins.edu/rest/synthetic/Patient/'+id
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        d2 = st['birthDate']
        zip = st['address'][0]['postalCode']
        d1 = dateutil.parser.parse(d1)
        d2 = dateutil.parser.parse(d2)
        #print d1
        #print d2
        diff = d1 - d2
        #print diff
        url = 'https://www.zipcodeapi.com/rest/lPMf5jmnBdclCZWkQwlFPJO6HkolG4N1TzgZSDnuRAPtzLOqi957STdzeBVVFIWz/multi-info.json/'+zip+'/degrees'
        txt = urlopen(url).read()
        st = json.loads(txt)
        #pprint.pprint(st)
        lat = st[zip]['lat']
        lng = st[zip]['lng']
        #print lat
        #print lng
        #print diff.days
        t = (id,d2,d1,diff.days,zip,lat,lng)
        res.append(t)
    return(res)

In [67]:
## Find age and zipcode for FHIR patients with asthma diagnoses
for c in asthmaCodes:
    
    ## Get asthma patients
    url = "http://ictrweb.johnshopkins.edu/rest/synthetic/Condition?icd_10="+c+"&_count=1000"
    txt = urlopen(url).read()
    stuff = json.loads(txt)
    pprint.pprint(findAgeZipcode(stuff)) 
    

[(u'249394936',
  datetime.datetime(1940, 10, 12, 0, 0),
  datetime.datetime(1977, 4, 11, 0, 0),
  13330,
  u'20748',
  38.814855,
  -76.933338),
 (u'281179991',
  datetime.datetime(1955, 9, 12, 0, 0),
  datetime.datetime(1981, 11, 28, 0, 0),
  9574,
  u'20015',
  38.966916,
  -77.058242),
 (u'941986615',
  datetime.datetime(1986, 12, 3, 0, 0),
  datetime.datetime(2011, 3, 14, 0, 0),
  8867,
  u'20783',
  38.997054,
  -76.966567),
 (u'537191851',
  datetime.datetime(1979, 5, 17, 0, 0),
  datetime.datetime(1980, 10, 26, 0, 0),
  528,
  u'20743',
  38.88424,
  -76.893225),
 (u'963064806',
  datetime.datetime(1980, 8, 14, 0, 0),
  datetime.datetime(1984, 5, 30, 0, 0),
  1385,
  u'20032',
  38.833703,
  -77.006674),
 (u'738765998',
  datetime.datetime(1992, 12, 6, 0, 0),
  datetime.datetime(2007, 4, 16, 0, 0),
  5244,
  u'20748',
  38.814855,
  -76.933338),
 (u'618976413',
  datetime.datetime(1918, 4, 27, 0, 0),
  datetime.datetime(2003, 10, 13, 0, 0),
  31215,
  u'20910',
  39.002842,
  -

  u'20910',
  85.412424,
  -45.523789),
 (u'281594447',
  datetime.datetime(1995, 7, 22, 0, 0),
  datetime.datetime(2000, 9, 24, 0, 0),
  1891,
  u'20032',
  66.908635,
  -23.148641),
 (u'936647614',
  datetime.datetime(1972, 2, 14, 0, 0),
  datetime.datetime(2003, 12, 9, 0, 0),
  11621,
  u'22207',
  87.608055,
  -28.995678),
 (u'937683929',
  datetime.datetime(1990, 5, 22, 0, 0),
  datetime.datetime(2015, 7, 13, 0, 0),
  9183,
  u'20722',
  45.810038,
  -23.665448),
 (u'281593716',
  datetime.datetime(1988, 2, 5, 0, 0),
  datetime.datetime(1992, 7, 12, 0, 0),
  1619,
  u'20912',
  42.41933,
  -42.623476),
 (u'971720355',
  datetime.datetime(1955, 3, 30, 0, 0),
  datetime.datetime(2005, 7, 19, 0, 0),
  18374,
  u'20815',
  57.890967,
  -40.983095),
 (u'611052299',
  datetime.datetime(1945, 4, 11, 0, 0),
  datetime.datetime(2003, 1, 29, 0, 0),
  21112,
  u'20019',
  87.592814,
  -34.544768),
 (u'434889442',
  datetime.datetime(1988, 3, 30, 0, 0),
  datetime.datetime(2013, 9, 28, 0, 0),

  datetime.datetime(1957, 12, 21, 0, 0),
  datetime.datetime(1993, 7, 16, 0, 0),
  12991,
  u'20815',
  56.745051,
  -27.805066),
 (u'294473673',
  datetime.datetime(2003, 8, 18, 0, 0),
  datetime.datetime(2014, 7, 5, 0, 0),
  3974,
  u'20016',
  67.584841,
  -63.91172),
 (u'829699331',
  datetime.datetime(1933, 11, 23, 0, 0),
  datetime.datetime(1938, 4, 28, 0, 0),
  1617,
  u'20008',
  56.124999,
  -39.8177),
 (u'595492788',
  datetime.datetime(1987, 11, 5, 0, 0),
  datetime.datetime(2001, 3, 14, 0, 0),
  4878,
  u'20020',
  49.746604,
  -58.071562),
 (u'758846314',
  datetime.datetime(1947, 11, 7, 0, 0),
  datetime.datetime(1961, 1, 19, 0, 0),
  4822,
  u'20019',
  92.749434,
  -60.327869),
 (u'517149438',
  datetime.datetime(1978, 4, 9, 0, 0),
  datetime.datetime(2016, 4, 7, 0, 0),
  13878,
  u'20020',
  87.561818,
  -19.683389),
 (u'646047503',
  datetime.datetime(1979, 6, 23, 0, 0),
  datetime.datetime(2013, 1, 19, 0, 0),
  12264,
  u'20011',
  53.27648,
  -35.769757),
 (u'833501

TypeError: list indices must be integers, not str

**Question: Are there differences in medications prescribed off-label by patients diagnosed with Asthma during childhood vs during adulthood?**
* Find age of diagosis for Asthma patients
* Find medications taken by Asthma patients
* Find medication classes for medications taken by Asthma patients
* Compare medication classes taken by patients diagosed with Asthma during childhood vs during adulthood

In [78]:
## Pull meds for HUSH+ patients
meds = {}
for x in HUSHpluspatients:
    medList = x['medList']
    # Collect the unique meds
    for m in medList.keys():
        found = False
        try:
            meds[medList[m]] = meds[medList[m]]+1
        except KeyError:
            meds[medList[m]] = 1
pprint.pprint(meds)

{None: 9,
 u'0.3 ML Epinephrine 1 MG/ML Auto-Injector [Epipen]': 1,
 u'120 ACTUAT Fluticasone propionate 0.05 MG/ACTUAT Nasal Inhaler': 2,
 u'24 HR Methylphenidate Hydrochloride 10 MG Extended Release Oral Capsule [Ritalin]': 1,
 u'ACYCLOVIR 5 % TOPICAL OINTMENT': 1,
 u'ADVAIR HFA 230-21 MCG INHALER': 1,
 u'ALBUTEROL 90 MCG INHALER': 1,
 u'ALBUTEROL SULF HFA 90 MCG INH': 7,
 u'ALBUTEROL SULFATE 2.5 MG/3 ML (0.083 %) SOLUTION FOR NEBULIZATION': 9,
 u'ALBUTEROL SULFATE HFA 90 MCG/ACTUATION AEROSOL INHALER': 19,
 u'AMOXICILLIN 250 MG CHEWABLE TABLET': 1,
 u'AMOXICILLIN 875 MG TABLET': 1,
 u'AUGMENTIN 400-57 MG/5 ML': 1,
 u'AUGMENTIN ES-600 SUSPENSION': 3,
 u'AZITHROMYCIN 250 MG TABLET': 11,
 u'Acetaminophen 21.7 MG/ML / Hydrocodone Bitartrate 0.5 MG/ML Oral Solution': 1,
 u'Acetaminophen 32 MG/ML Oral Suspension': 1,
 u'Albuterol 0.83 MG/ML Inhalant Solution': 3,
 u'Amitriptyline Hydrochloride 10 MG Oral Tablet': 1,
 u'Amoxicillin 120 MG/ML / Clavulanate 8.58 MG/ML Oral Suspension': 4,
 u

In [79]:
## For a given medication string get NCBO annotations
## We let NCBO match any ontology since just using RxNORM doesn't
## always give us just the drug name (e.g., "CLINDAMYCIN 15 MG/ML ORAL SOLUTION" is
## a valid RxNORM term)
def med2rxnorm(txt):
    url = 'http://data.bioontology.org/annotator?text=%s&apikey=b792dd1b-cdc2-4cc8-aaf2-4fa4fbf47e4e'
    txt = urlopen(url % quote_plus(txt)).read()
    resp = json.loads(txt)
    if len(resp) == 0: return([])
    annos = []
    for aresp in resp:
        annos.extend([ x['text'] for x in aresp['annotations'] ])
    ##annos = filter(lambda x: not any(d in x for d in'0123456789'), annos)
    return(annos)
print med2rxnorm("CLINDAMYCIN 15 MG/ML ORAL SOLUTION")        


[u'CLINDAMYCIN 15 MG/ML ORAL SOLUTION', u'CLINDAMYCIN 15 MG/ML', u'CLINDAMYCIN', u'ORAL SOLUTION']


In [None]:
## Get RxNORM codes for medication strings
medrxnorm = {}
for med in meds.keys():
    if med is None: continue    
    annos = med2rxnorm(med)
    print 'Processing %s and found %d annotations' % (med, len(annos))
    medrxnorm[med] = {'count':meds[med], 'annos':annos}    

Processing HYDROCORTISONE VAL 0.2% CREAM and found 1 annotations
Processing CLARITIN 5 MG/5 ML SYRUP and found 1 annotations
Processing CLINDAMYCIN 150 MG/ML INJECTION SOLUTION and found 4 annotations
Processing PROAIR HFA 90 MCG INHALER and found 1 annotations
Processing ALBUTEROL SULFATE 2.5 MG/3 ML (0.083 %) SOLUTION FOR NEBULIZATION and found 2 annotations
Processing Fexofenadine hydrochloride 30 MG Oral Tablet and found 5 annotations
Processing ADVAIR HFA 230-21 MCG INHALER and found 1 annotations
Processing Ofloxacin 3 MG/ML Ophthalmic Solution and found 4 annotations
Processing levocetirizine dihydrochloride 5 MG Oral Tablet and found 5 annotations
Processing DIAZEPAM 2 MG TABLET and found 2 annotations
Processing Clonidine Hydrochloride 0.1 MG Oral Tablet and found 5 annotations
Processing LIDEX 0.05% OINTMENT and found 0 annotations
Processing KETOCONAZOLE 2 % TOPICAL CREAM and found 3 annotations
Processing BACTROBAN 2% OINTMENT and found 1 annotations
Processing FOCALIN XR 5

In [50]:
## Given a drug (identified by RXNORM), get conditions that the drug has a 
## status (approved, phase 3) for, via NDK API
def drug2conditions(drug):
    import urllib
    url = "https://tripod.nih.gov/ndk/treatment/%s/conditions" % (drug)
    page = urllib.urlopen(url).read().strip()
    if page == "": return []
    resp = json.loads(page)
    conds = []
    for aresp in resp:
        condname = aresp['name']
        if 'ICD10' in aresp.keys():
            condicd10 = aresp['ICD10']
        else: condicd10 = []
        conds.append( (aresp['status'], condname, condicd10) )
    return(conds)

In [53]:
keys = list(medrxnorm.keys())
n = 10
annos = medrxnorm[keys[n]]['annos']
conds = []
for anno in annos:
    conds.extend(drug2conditions(anno))
print "##", keys[n], "##", conds

## Salicylic Acid 275 MG/ML Topical Solution ## [(u'Approved', u'Psoriasis', [u'L40']), (u'Approved', u'Keratoderma blennorrhagicum', []), (u'Approved', u'Pityriasis rubra pilaris', [u'L44.0']), (u'Approved', u'Folliculitis ulerythematosa reticulata', [u'L66.4']), (u'Approved', u'Congenital ichthyosis', [u'Q80']), (u'Approved', u'Wart', []), (u'Approved', u'Corns and callosities', [u'L84']), (u'Approved', u'Acne', [u'L70']), (u'Approved', u'Pyoderma', [u'L08.0']), (u'Approved', u'Inflammation', [])]
