### Cooking with ClarityNLP



In [1]:
import json, csv
import urllib, requests
import pandas as pd

url = 'http://18.220.133.76:5000/'
nlpql_url = url + 'nlpql'
expander_url = url + 'nlpql_expander'
tester_url = url + 'nlpql_tester'

#### ValueExtractor

Here we explore how to extract Temperature values from Nursing notes.

In [2]:
# Sample NLPQL
ve_nlpql ='''
limit 100;

phenotype "Patient Temperatures" version "2";

include ClarityCore version "1.0" called Clarity;

documentset NursingNotes:
     Clarity.createDocumentSet({
         "report_types":["Nursing"]
         });

termset TemperatureTerms:
   ["temp","temperature","t"];

 define Temperature:
   Clarity.ValueExtraction({
     termset:[TemperatureTerms],
     documentset: [NursingNotes],
     minimum_value: "96",
     maximum_value: "106"
     });

 define final hasFever:
     where Temperature.value >= 100.4;
'''

In [3]:
re = requests.post(nlpql_url, data=ve_nlpql, headers={'content-type':'text/plain'})
if re.ok:
    run_result = re.json()
    
    main_csv = run_result['main_results_endpoint']
    intermediate_csv = run_result['intermediate_results_endpoint']
    luigi = run_result['luigi_task_monitoring']
    print(json.dumps(run_result, indent=4, sort_keys=True))

{
    "intermediate_results_endpoint": "http://18.220.133.76:5000/job_results/264/phenotype_intermediate",
    "job_id": "264",
    "luigi_task_monitoring": "http://18.220.133.76:8082/static/visualiser/index.html#search__search=job=264",
    "main_results_endpoint": "http://18.220.133.76:5000/job_results/264/phenotype",
    "phenotype_config": "http://18.220.133.76:5000/phenotype_id/264",
    "phenotype_id": "264",
    "pipeline_configs": [
        "http://18.220.133.76:5000/pipeline_id/426"
    ],
    "pipeline_ids": [
        426
    ],
    "status_endpoint": "http://18.220.133.76:5000/status/264"
}


### Viewing your results
You can view your job's progress, using the [Luigi Status Monitor](http://18.220.133.76:8082/static/visualiser/index.html).

In [5]:
inter_csv_df = pd.read_csv(intermediate_csv)
inter_csv_df.head()

Unnamed: 0,_id,batch,concept_code,condition,dimension_X,dimension_Y,dimension_Z,end,inserted_date,job_id,...,source,start,subject,temporality,term,text,units,value,value1,value2
0,5b858087f134570d1438483f,100,-1,EQUAL,100.8,-1.0,,20,2018-08-28 17:04:07.830000,264,...,MIMIC,0,91333,,,Temp,,100.8,,
1,5b858087f134570d14384840,100,-1,EQUAL,102.3,-1.0,,19,2018-08-28 17:04:07.915000,264,...,MIMIC,0,671,,,Temp,,102.3,,
2,5b858087f134570d14384841,100,-1,EQUAL,102.3,-1.0,,19,2018-08-28 17:04:07.990000,264,...,MIMIC,0,671,,,Temp,,102.3,,
3,5b858088f134570d14384842,100,-1,EQUAL,101.2,-1.0,,152,2018-08-28 17:04:08.085000,264,...,MIMIC,115,10144,,,temp,,101.2,,
4,5b858088f134570d14384843,100,-1,EQUAL,100.4,-1.0,,83,2018-08-28 17:04:08.405000,264,...,MIMIC,73,69811,,,Temp,,100.4,,


Make sure your job is done before downloading final results.

In [6]:
final_csv_df = pd.read_csv(main_csv)
final_csv_df.head()

Unnamed: 0,_id,batch,concept_code,condition,context_type,dimension_X,dimension_Y,dimension_Z,end,inserted_date,...,source,start,subject,temporality,term,text,units,value,value1,value2
0,5b858094f134570d4d38483f,100,-1,EQUAL,subject,100.8,-1.0,,20,2018-08-28 17:04:07.830000,...,MIMIC,0,91333,,,Temp,,100.8,,
1,5b858094f134570d4d384840,100,-1,EQUAL,subject,102.3,-1.0,,19,2018-08-28 17:04:07.915000,...,MIMIC,0,671,,,Temp,,102.3,,
2,5b858094f134570d4d384841,100,-1,EQUAL,subject,102.3,-1.0,,19,2018-08-28 17:04:07.990000,...,MIMIC,0,671,,,Temp,,102.3,,
3,5b858094f134570d4d384842,100,-1,EQUAL,subject,101.2,-1.0,,152,2018-08-28 17:04:08.085000,...,MIMIC,115,10144,,,temp,,101.2,,
4,5b858094f134570d4d384843,100,-1,EQUAL,subject,100.4,-1.0,,83,2018-08-28 17:04:08.405000,...,MIMIC,73,69811,,,Temp,,100.4,,
