# Mockup Version 1

- Mock-up the workflow from MIS global summit in an environment like EvidenceCare's Epic App. Due January 1.
    - ☑️ List content for our app (Due by Nov 23) TJM
    - ☑️ Define the workflow between user, website, and service (Due by Nov 23) TJM & NS workshop session
    - Define interface between UI and service (Due by Nov 30) TJM & NS workshop session
        - tell Nikesh when the spreadsheet is done so they can freeze their design specs
        - tell Nikesh when functions are available to call
    - Make a webpage that looks like an Epic screen (Due by Dec 7) TJM
        - Nikesh will create a static page layout 
    - ☑️ Flask configuration (due by Dec 1) TJM (Nikesh's static IP: 122:169:102:165) - in router, 
    - ☑️ Port configuration (due by Dec 1) TJM & NS
    - Write & test the code for webpage (Due by Dec 23) TJM & NS
    - Provide instructions, documentation, supporting information to make it (Due by Dec 23) BED

Define and test functions in jupyter notebook
- ☑️ delete UMLS concepts from database
- ☑️ re-import UMLS concepts into database using the improved data model
- ☑️ get a CSV of common problems (at least 400 instances each), with the corresponding CUI  
- re-import 'INSTANCE_OF' relationships between concepts, labs, and prescriptions
- modify existing functions for the new data model
    - ☑️ comorbidities_of(prob_list)
    - LikelyAbnormalLabs(cui_prob_list)
    - LikelyPrescriptions(cui_prob_list)
- Save functions to separate python scripts
- Write a dummy function that returns assessment and plan in JSON format

In [123]:
import pandas as pd
import time

In [124]:
# import getpass
# password = getpass.getpass("\nPlease enter the Neo4j database password to continue \n")

from neo4j import GraphDatabase
driver=GraphDatabase.driver(uri="bolt://localhost:7687", auth=('neo4j','NikeshIsCool'))
session=driver.session()

In [9]:
# get a CSV of all problems in the database that have enough data to potentially return robust results
query = '''
MATCH (p:Problem)-[:INSTANCE_OF]->(c:Concept)
WITH c.cui AS CUIs, count(p) as instances
WHERE instances > 400
MATCH (c2:Concept)
WHERE c2.cui_pref_term IS NOT NULL AND c2.cui IN CUIs
RETURN c2.term as problem, c2.cui as CUI'''
data = session.run(query)

# Convert the neo4j object into a dataframe
common_problems_df = pd.DataFrame([dict(record) for record in data])

In [55]:
common_problems_df.sort_values(by='problem', inplace=True)
common_problems_df

NameError: name 'common_problems_df' is not defined

In [14]:
# Write out to CSV
common_problems_df.to_csv('common_problems.csv', index=False, encoding='utf-8')

## Modify existing functions for the new data model
### Input a list of CUIs for known problems, output possible additional problems with their CUIs and odds ratios in a dataframe

In [150]:
def PotentialComorbidities(cui_prob_list):
    
    query = '''
    MATCH (prob1:Problem)<-[:HAD_PROBLEM]-(pt:Patients)
    WHERE prob1.cui in {cui_prob_list}
    WITH distinct(pt) AS patients
    MATCH (patients)-[:HAD_PROBLEM]->(prob2:Problem)
    WITH prob2.cui AS CUIs, count(prob2) AS Number
    MATCH (c:Concept)
    WHERE c.cui IN CUIs AND c.cui_pref_term IS NOT NULL 
    RETURN c.term as `Potential Problem`, c.cui AS CUI, Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    comorbidities = session.run(query)
    comorbidities = pd.DataFrame([dict(record) for record in comorbidities])
    
    query = '''
    MATCH (excluded:Problem)
    WHERE excluded.cui in {cui_prob_list}
    WITH collect(excluded) as excluded
    MATCH (pt:Patients)-[:HAD_PROBLEM]->(prob:Problem)
    WITH excluded, pt, collect(prob) as problems
    WHERE NONE (prob in problems where prob in excluded)
    MATCH (pt)-[:HAD_PROBLEM]-(prob2:Problem)
    WITH prob2.cui AS CUIs, count(prob2) AS Number
    MATCH (c:Concept)
    WHERE c.cui IN CUIs AND c.cui_pref_term IS NOT NULL 
    RETURN c.term as `Potential Problem`, c.cui AS CUI, Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    gen_problems = session.run(query)
    gen_problems = pd.DataFrame([dict(record) for record in gen_problems])
    
    gen_pop_total = sum(gen_problems['Number'])
    gen_problems['Gen_pop_proportion'] = gen_problems['Number']/gen_pop_total
    
    gen_problems = gen_problems[gen_problems['Number'] > 25]
    
    comorb_total = sum(comorbidities['Number'])
    comorbidities['Comorbidities_proportion'] = comorbidities['Number']/comorb_total
    
    comorbidities = comorbidities[comorbidities['Number'] > 75/len(cui_prob_list)]
    
    # Merge the "Gen_pop_proportion" column from gen_problems into comorbidities
    comorbidities = pd.merge(comorbidities, gen_problems, on=['CUI', 'Potential Problem'])
    
    comorbidities.head()
    
    comorbidities['Odds Ratio'] = (comorbidities['Comorbidities_proportion']/comorbidities['Gen_pop_proportion'])
    comorbidities.sort_values(by='Odds Ratio', ascending=False, inplace=True)
    
    return comorbidities.loc[:,['CUI','Potential Problem', 'Odds Ratio']].head(10)
#     return comorbidities.head(10)

In [151]:
# Test the function
start_time = time.time()

cui_prob_list = ['C1565489', 'C0085762']
result_df = PotentialComorbidities(cui_prob_list)

print("Total runtime:", time.time() - start_time, "seconds")
result_df

Total runtime: 0.5168774127960205 seconds


Unnamed: 0,CUI,Potential Problem,Odds Ratio
5,C0080179,Spinal Fractures,33.967289
58,C0001430,Adenoma,16.91168
72,C0085119,Foot Ulcer,15.174249
68,C0036690,Septicemia,14.704117
18,C0162297,Respiratory arrest,14.635917
10,C0023891,"Liver Cirrhosis, Alcoholic",14.437897
6,C0236663,Alcohol withdrawal syndrome,10.507838
46,C0031117,Peripheral Neuropathy,10.254955
2,C0521614,Gallstone pancreatitis,10.007094
13,C0019187,"Hepatitis, Alcoholic",9.941156


### Input 1 problem CUI in a list, output labs likely to be abnormal with their odds ratios in a dataframe

In [140]:
def LikelyAbnormalLabs(cui_prob_list):
    
    query = '''
    MATCH (prob1:Problem)<-[:HAD_PROBLEM]-(pt:Patients)
    WHERE prob1.cui in {cui_prob_list}
    WITH distinct(pt) AS patients
    MATCH (d:D_Labitems)-[:DESCRIBES]->(n:Labevents)<-[:HAD]-(patients)
    RETURN d.itemid AS ITEMID, d.label as `Abnormal Lab`, d.fluid as `Source`, COUNT(n.flag = 'abnormal') AS abnormal, COUNT(n) as total
    ORDER BY total DESC
    '''.format(cui_prob_list=cui_prob_list)
    with_prob_labs = session.run(query)
    with_prob_labs = pd.DataFrame([dict(record) for record in with_prob_labs])
    
    query = '''
    MATCH (excluded:Problem)
    WHERE excluded.cui in {cui_prob_list}
    WITH collect(excluded) as excluded
    MATCH (pt:Patients)-[:HAD_PROBLEM]->(prob:Problem)
    WITH excluded, pt, collect(prob) as problems
    WHERE NONE (prob in problems where prob in excluded)
    MATCH (d:D_Labitems)-[:DESCRIBES]->(n:Labevents)<-[:HAD]-(pt)
    RETURN d.itemid AS ITEMID, COUNT(n.flag = 'abnormal') AS abnormal, COUNT(n) as total
    ORDER BY total DESC
    '''.format(cui_prob_list=cui_prob_list)
    without_prob_labs = session.run(query)
    without_prob_labs = pd.DataFrame([dict(record) for record in without_prob_labs])
    
    without_prob_labs = without_prob_labs[without_prob_labs['abnormal'] > 10]
    without_prob_labs['without_prob_proportion_abnl'] = without_prob_labs['abnormal']/without_prob_labs['total']
    
    with_prob_labs = with_prob_labs[with_prob_labs['abnormal'] > 10]
    with_prob_labs['with_prob_proportion_abnl'] = with_prob_labs['abnormal']/with_prob_labs['total']
        
    # Merge the "Gen_pop_proportion" column from gen_problems into comorbidities
    with_prob_labs = pd.merge(with_prob_labs, without_prob_labs, on=['ITEMID'])
    
    with_prob_labs['Odds Ratio'] = (with_prob_labs['with_prob_proportion_abnl']/with_prob_labs['without_prob_proportion_abnl'])
    with_prob_labs.sort_values(by='Odds Ratio', ascending=False, inplace=True)
    
    return with_prob_labs.loc[:,['Abnormal Lab', 'Source', 'Odds Ratio']].head(10)

In [141]:
start_time = time.time()

cui_prob_list = ['C0036690']
likely_abnormal_labs = LikelyAbnormalLabs(cui_prob_list)

print("Total runtime:", time.time() - start_time, "seconds")
likely_abnormal_labs

Total runtime: 18.243038415908813 seconds


Unnamed: 0,Abnormal Lab,Source,Odds Ratio
51,Phenytoin,Blood,1.669151
18,"Calcium, Total",Blood,1.595422
47,Thyroid Stimulating Hormone,Blood,1.54317
38,WBC,Urine,1.46511
50,Iron,Blood,1.455163
40,Albumin,Blood,1.334631
7,Anion Gap,Blood,1.316288
10,Magnesium,Blood,1.315481
48,Metamyelocytes,Blood,1.299398
39,Troponin T,Blood,1.27924


### Input 1 problem CUI in a list, output prescriptions likely to be ordered with their odds ratios in a dataframe

In [146]:
def LikelyPrescriptions(cui_prob_list):
        
    query = '''
    MATCH (prob1:Problem)<-[:HAD_PROBLEM]-(pt:Patients)
    WHERE prob1.cui in {cui_prob_list}
    WITH distinct(pt) AS patients
    MATCH (patients)-[:HAD]->(rx:Prescriptions)
    RETURN rx.drug AS Drug, count(rx.drug) as Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    with_prob_Rx = session.run(query)
    with_prob_Rx = pd.DataFrame([dict(record) for record in with_prob_Rx])
    
    query = '''
    MATCH (prob1:Problem)<-[:HAD_PROBLEM]-(pt:Patients)
    WHERE NOT prob1.cui in {cui_prob_list}
    WITH distinct(pt) AS patients
    MATCH (patients)-[:HAD]->(rx:Prescriptions)
    RETURN rx.drug AS Drug, count(rx.drug) as Number
    ORDER BY Number DESC
    '''.format(cui_prob_list=cui_prob_list)
    without_prob_Rx = session.run(query)
    without_prob_Rx = pd.DataFrame([dict(record) for record in without_prob_Rx])
       
    without_prob_total = sum(without_prob_Rx['Number'])
    without_prob_Rx['without_prob_proportion'] = without_prob_Rx['Number']/without_prob_total
    
    without_prob_Rx = without_prob_Rx[without_prob_Rx['Number'] > 30]
        
    with_prob_total = sum(with_prob_Rx['Number'])
    with_prob_Rx['with_prob_proportion'] = with_prob_Rx['Number']/with_prob_total
        
    with_prob_Rx = with_prob_Rx[with_prob_Rx['Number'] > 20]
    
    # Merge the "Gen_pop_proportion" column from gen_problems into comorbidities
    with_prob_Rx = pd.merge(with_prob_Rx, without_prob_Rx, on=['Drug'])
    
    with_prob_Rx['Odds Ratio'] = (with_prob_Rx['with_prob_proportion']/with_prob_Rx['without_prob_proportion'])
    with_prob_Rx.sort_values(by='Odds Ratio', ascending=False, inplace=True)
    
    return with_prob_Rx.loc[:,['Drug','Odds Ratio']].head(10)

In [147]:
start_time = time.time()

cui_prob_list = ['C0348801']
likely_Rx = LikelyPrescriptions(cui_prob_list)

print("Total runtime:", time.time() - start_time, "seconds")
likely_Rx

Total runtime: 12.675717830657959 seconds


Unnamed: 0,Drug,Odds Ratio
35,Ursodiol,18.197974
44,Octreotide Acetate,7.280452
13,Tacrolimus,4.033896
50,Hydrocortisone Na Succ.,3.59986
15,Lactulose,3.564031
21,MethylPREDNISolone Sodium Succ,3.321484
41,Phenytoin Sodium,3.183578
18,1/2 NS,2.808017
42,LeVETiracetam,2.27248
49,Albumin 25% (12.5g / 50mL),2.204126
