# Mockup Version 1

- Mock-up the workflow from MIS global summit in an environment like EvidenceCare's Epic App. Due January 1.
    - ☑️ List content for our app (Due by Nov 23) TJM
    - ☑️ Define the workflow between user, website, and service (Due by Nov 23) TJM & NS workshop session
    - ☑️ Define interface between UI and service (Due by Nov 30) TJM & NS workshop session
    - Improve performance of labs and prescriptions queries (Due by Dec 7) TJM
        - Create OCCURS_WITH relationships that have z-scores and odds ratios
    - Create a static page layout (Due by Dec 7) NS
    - ☑️ Flask configuration (due by Dec 1) TJM (Nikesh's static IP: 122:169:102:165) - in router, 
    - ☑️ Port configuration (due by Dec 1) TJM & NS
    - Write & test the code for webpage (Due by Dec 23) TJM & NS
    - Provide instructions, documentation, supporting information to make it (Due by Dec 23) BED

Define and test functions in jupyter notebook
- ☑️ delete UMLS concepts from database
- ☑️ re-import UMLS concepts into database using the improved data model
- ☑️ get a CSV of common problems (at least 400 instances each), with the corresponding CUI  
- re-import 'INSTANCE_OF' relationships between concepts, labs, and prescriptions
- ☑️ modify existing functions for the new data model
- ☑️ Save functions to a python script (fetchData.py)
- Write a dummy function that returns assessment and plan in JSON format

In [1]:
import pandas as pd
import time

In [2]:
# import getpass
# password = getpass.getpass("\nPlease enter the Neo4j database password to continue \n")

from neo4j import GraphDatabase
driver=GraphDatabase.driver(uri="bolt://localhost:7687", auth=('neo4j','NikeshIsCool'))
session=driver.session()

In [9]:
# get a CSV of all problems in the database that have enough data to potentially return robust results
query = '''
MATCH (p:Problem)-[:INSTANCE_OF]->(c:Concept)
WITH c.cui AS CUIs, count(p) as instances
WHERE instances > 400
MATCH (c2:Concept)
WHERE c2.cui_pref_term IS NOT NULL AND c2.cui IN CUIs
RETURN c2.term as problem, c2.cui as CUI'''
data = session.run(query)

# Convert the neo4j object into a dataframe
common_problems_df = pd.DataFrame([dict(record) for record in data])

In [55]:
common_problems_df.sort_values(by='problem', inplace=True)
common_problems_df

NameError: name 'common_problems_df' is not defined

In [14]:
# Write out to CSV
common_problems_df.to_csv('common_problems.csv', index=False, encoding='utf-8')

## Modify existing functions for the new data model
### Input a list of CUIs for known problems, output possible additional problems with their CUIs and odds ratios in a dataframe

In [3]:
def PotentialComorbidities(cui_prob_list):
    
    query = '''
    MATCH (prob1:Problem)<-[:HAD_PROBLEM]-(pt:Patients)
    WHERE prob1.cui in {cui_prob_list}
    WITH distinct(pt) AS patients
    MATCH (patients)-[:HAD_PROBLEM]->(prob2:Problem)
    WITH prob2.cui AS CUIs, count(prob2) AS Number
    MATCH (c:Concept)
    WHERE c.cui IN CUIs AND c.cui_pref_term IS NOT NULL 
    RETURN c.term as `Potential Problem`, c.cui AS CUI, Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    comorbidities = session.run(query)
    comorbidities = pd.DataFrame([dict(record) for record in comorbidities])
    
    query = '''
    MATCH (excluded:Problem)
    WHERE excluded.cui in {cui_prob_list}
    WITH collect(excluded) as excluded
    MATCH (pt:Patients)-[:HAD_PROBLEM]->(prob:Problem)
    WITH excluded, pt, collect(prob) as problems
    WHERE NONE (prob in problems where prob in excluded)
    MATCH (pt)-[:HAD_PROBLEM]-(prob2:Problem)
    WITH prob2.cui AS CUIs, count(prob2) AS Number
    MATCH (c:Concept)
    WHERE c.cui IN CUIs AND c.cui_pref_term IS NOT NULL 
    RETURN c.term as `Potential Problem`, c.cui AS CUI, Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    gen_problems = session.run(query)
    gen_problems = pd.DataFrame([dict(record) for record in gen_problems])
    
    gen_pop_total = sum(gen_problems['Number'])
    gen_problems['Gen_pop_proportion'] = gen_problems['Number']/gen_pop_total
    
    gen_problems = gen_problems[gen_problems['Number'] > 25]
    
    comorb_total = sum(comorbidities['Number'])
    comorbidities['Comorbidities_proportion'] = comorbidities['Number']/comorb_total
    
    comorbidities = comorbidities[comorbidities['Number'] > 75/len(cui_prob_list)]
    
    # Merge the "Gen_pop_proportion" column from gen_problems into comorbidities
    comorbidities = pd.merge(comorbidities, gen_problems, on=['CUI', 'Potential Problem'])
    
    comorbidities.head()
    
    comorbidities['OddsRatio'] = (comorbidities['Comorbidities_proportion']/comorbidities['Gen_pop_proportion'])
    comorbidities.sort_values(by='OddsRatio', ascending=False, inplace=True)
    
    return comorbidities.loc[:,['CUI','Potential Problem', 'OddsRatio']].head(10)
#     return comorbidities.head(10)

In [4]:
# Test the function
start_time = time.time()

cui_prob_list = ['C1565489', 'C0085762']
PotentialComorbidities = PotentialComorbidities(cui_prob_list)

print("Total runtime:", time.time() - start_time, "seconds")
PotentialComorbidities

Total runtime: 3.0094053745269775 seconds


Unnamed: 0,CUI,Potential Problem,OddsRatio
5,C0080179,Spinal Fractures,33.967289
58,C0001430,Adenoma,16.91168
72,C0085119,Foot Ulcer,15.174249
68,C0036690,Septicemia,14.704117
18,C0162297,Respiratory arrest,14.635917
10,C0023891,"Liver Cirrhosis, Alcoholic",14.437897
6,C0236663,Alcohol withdrawal syndrome,10.507838
46,C0031117,Peripheral Neuropathy,10.254955
2,C0521614,Gallstone pancreatitis,10.007094
13,C0019187,"Hepatitis, Alcoholic",9.941156


### Input 1 problem, output labs likely to be abnormal and prescriptions likely to be ordered

In [13]:
def LikelyOrders(cui_prob_list):
        
    query = '''
    MATCH p=(ord:Concept)-[r:OCCURS_WITH]->(c:Concept) 
    WHERE c.cui IN {cui_prob_list}
    WITH round(r.co_occurrance_probability, 5)*1000 AS Score, ord, r
    WHERE Score > 20
    RETURN ord.term AS `Order`, ord.description AS AlternateDescription, Score
    ORDER BY r.co_occurrance_probability DESC
    '''.format(cui_prob_list=cui_prob_list)
    data = session.run(query)
    LikelyOrders = pd.DataFrame([dict(record) for record in data])
    
    # Assign prescriptions to a dataframe
    orders_likely_rx = LikelyOrders[LikelyOrders.AlternateDescription.isnull()]
    orders_likely_rx = orders_likely_rx.loc[:,['Order', 'Score']]
    
    # Assign labs likely to be abnormal to a dataframe
    orders_likely_lab = LikelyOrders[LikelyOrders.AlternateDescription.notnull()]
    orders_likely_lab = orders_likely_lab.loc[:,['AlternateDescription', 'Score']]
    orders_likely_lab.columns = ['Order', 'Score']
    
    return orders_likely_rx, orders_likely_lab

In [14]:
start_time = time.time()

cui_prob_list = ['C0022661']
orders_likely_rx, orders_likely_lab = LikelyOrders(cui_prob_list)

print("Total runtime:", time.time() - start_time, "seconds")

Total runtime: 0.030483007431030273 seconds


In [17]:
import json
result_rx = orders_likely_rx.to_json(orient="records")
json.loads(result_rx)

[{'Order': 'calcitriol 0.00025 MG Oral Capsule', 'Score': 109.59},
 {'Order': 'sevelamer carbonate 800 MG Oral Tablet [Renvela]',
  'Score': 106.88},
 {'Order': 'sodium polystyrene sulfonate 250 MG/ML Oral Suspension [Kionex]',
  'Score': 103.32},
 {'Order': '1 ML epoetin alfa 4000 UNT/ML Injection [Procrit]',
  'Score': 79.31},
 {'Order': '150 ML Glucose 50 MG/ML Injection', 'Score': 76.26},
 {'Order': 'purified protein derivative of tuberculin 50 UNT/ML Injectable Solution [Tubersol]',
  'Score': 70.99},
 {'Order': 'sodium bicarbonate 650 MG Oral Tablet', 'Score': 66.74},
 {'Order': 'midodrine hydrochloride 5 MG Oral Tablet', 'Score': 64.02},
 {'Order': 'piperacillin 2000 MG / tazobactam 250 MG Injection [Zosyn]',
  'Score': 62.86},
 {'Order': 'cefepime 1000 MG Injection', 'Score': 60.51},
 {'Order': 'omeprazole 20 MG Delayed Release Oral Capsule', 'Score': 59.91},
 {'Order': 'heparin sodium, porcine 1000 UNT/ML Injectable Solution',
  'Score': 51.76},
 {'Order': 'acetaminophen 500 M

In [12]:
orders_likely_lab

Unnamed: 0,Order,Score
11,Macrophage in Ascites,57.9
12,Lymphocytes in Other Body Fluid,57.7
13,Polys in Other Body Fluid,53.8
14,Lymphocytes in Ascites,52.8
16,"RBC, Ascites in Ascites",51.5
17,"WBC, Ascites in Ascites",50.7
18,Monocytes in Ascites,49.7
19,Polys in Ascites,48.8
20,Protein/Creatinine Ratio in Urine,48.0
22,pH in Urine,44.5
