Skip to content
This repository has been archived by the owner on Apr 5, 2024. It is now read-only.

Commit

Permalink
add enum_list as a valid option for valid extractor in nlpql, #46 (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
charhart committed Jul 30, 2018
1 parent d2d0d3c commit d547a9f
Show file tree
Hide file tree
Showing 14 changed files with 814 additions and 798 deletions.
5 changes: 4 additions & 1 deletion nlp/algorithms/value_extraction/value_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,10 @@ def run(term_string, sentence, str_minval=None, str_maxval=None,
terms = [term.strip() for term in terms]

if enumlist:
filter_terms = enumlist.split(',')
if type(enumlist) == 'str':
filter_terms = enumlist.split(',')
else:
filter_terms = enumlist
filter_terms = [term.strip() for term in filter_terms]

# save a copy of the original terms
Expand Down
44 changes: 16 additions & 28 deletions nlp/algorithms/value_extraction_wrappers/value_extractor_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
from itertools import product

import regex as re

import json
from data_access import Measurement
from algorithms.segmentation import *
from algorithms.value_extraction.value_extractor import extract_value
from algorithms.value_extraction import run_value_extractor

print('Initializing models for value extractor...')
segmentor = Segmentation()
print('Done initializing models for value extractor...')


def run_value_extractor_full(term_list, text, minimum_value, maximum_value, is_case_sensitive_text=False, denom_only=False):
# convert terms to lowercase unless doing a case-sensitive match
if not is_case_sensitive_text:
term_list = [term.lower() for term in term_list]
text = text.lower()

# do range check on numerator values for fractions
if isinstance(minimum_value, str):
if -1 != minimum_value.find('/'):
minimum_value = minimum_value.split('/')[0]

if isinstance(maximum_value, str):
if -1 != maximum_value.find('/'):
maximum_value = maximum_value.split('/')[0]

minval = float(minimum_value)
maxval = float(maximum_value)
def run_value_extractor_full(term_list, text, minimum_value, maximum_value, enumlist=list(), is_case_sensitive_text=False, denom_only=False):

sentence_list = segmentor.parse_sentences(text)
process_results = []
Expand All @@ -39,19 +23,23 @@ def run_value_extractor_full(term_list, text, minimum_value, maximum_value, is_c
match = matcher.search(sentence)
if match:
term = match.group(0)
value_results = extract_value(term, sentence, minval, maxval, denom_only=denom_only)
if len(value_results) > 0:
for x in value_results:
process_results.append(
Measurement(sentence=sentence, text=x.matching_term, start=x.start, end=x.end,
condition=x.cond, X=x.num1, Y=x.num2))
value_str = run_value_extractor(term, sentence, str_minval=minimum_value, str_maxval=maximum_value, enumlist=enumlist, is_case_sensitive=is_case_sensitive_text, is_denom_only=denom_only)
if len(value_str) > 0:
value_results = json.loads(value_str)
if 'measurementList' in value_results:
measurement_results = value_results['measurementList']
for x in measurement_results:
process_results.append(
Measurement(sentence=sentence, text=x['matchingTerm'], start=x['start'], end=x['end'],
condition=x['condition'], X=x['x'], Y=x['y']))

return process_results


if __name__ == '__main__':
res = run_value_extractor_full(["temperature", "temp", "T"],
"Temp was 99-101",
96, 106, False)
res = run_value_extractor_full(["temperature", "temp", "T", "BP", "HR", "Sp02"],
'Prior to transfer, his vitals were BP 119/53 and BP 105/43 sleeping, '
'HR 103, RR 15, and SpO2 97% on NRB.',
10, 500, is_case_sensitive_text=False)

[print(str(t.to_json())) for t in res]
3 changes: 2 additions & 1 deletion nlp/apis/algorithm_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def value_extractor():
if request.method == 'POST' and request.data:
init()
obj = NLPModel.from_dict(request.get_json())
results = run_value_extractor_full(obj.terms, obj.text, obj.min_value, obj.max_value, obj.case_sensitive)
results = run_value_extractor_full(obj.terms, obj.text, obj.min_value, obj.max_value, is_case_sensitive_text=obj
.case_sensitive)

return json.dumps([r.__dict__ for r in results], indent=4)
return "Please POST a valid JSON object with terms and text"
Expand Down
5 changes: 3 additions & 2 deletions nlp/data_access/pipeline_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def __init__(self, pipeline_id, owner, name, description, config_string, pipelin

class PipelineConfig(BaseModel):

def __init__(self, config_type, name, terms=list(), description = '', limit=0, concept_code=-1, owner='system',
def __init__(self, config_type, name, terms=list(), description='', limit=0, concept_code=-1, owner='system',
include_synonyms=False, include_descendants=False, include_ancestors=False, report_tags=list(),
vocabulary='SNOMED', sections=list(), report_type_query='', minimum_value=0, maximum_value=10000,
case_sensitive=False, cohort=list(), is_phenotype=False, report_types=list(), custom_query='', filter_query='',
custom_arguments: dict=dict()):
custom_arguments: dict=dict(), enum_list: list=list()):
self.config_type = config_type
self.name = name
self.description = description
Expand All @@ -53,6 +53,7 @@ def __init__(self, config_type, name, terms=list(), description = '', limit=0, c
self.cohort = cohort
self.is_phenotype = is_phenotype
self.custom_arguments = custom_arguments
self.enum_list = enum_list


def insert_pipeline_config(pipeline: PipelineConfig, connection_string: str):
Expand Down
2 changes: 1 addition & 1 deletion nlp/luigi_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def complete(self):

if __name__ == "__main__":
owner = "tester"
p_id = "10097"
p_id = "10120"
the_job_id = data_access.create_new_job(
data_access.NlpJob(job_id=-1, name="Test Phenotype", description="Test Phenotype",
owner=owner, status=data_access.STARTED, date_ended=None,
Expand Down
1 change: 1 addition & 0 deletions nlp/nlpql/nlpql_lexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ DEFINE: 'define';
CONTEXT: 'context';
MIN_VALUE: 'minimum_value';
MAX_VALUE: 'maximum_value';
ENUM_LIST: 'enum_list';
LIMIT: 'limit';


Expand Down
5 changes: 4 additions & 1 deletion nlp/nlpql/nlpql_lexer.interp

Large diffs are not rendered by default.

0 comments on commit d547a9f

Please sign in to comment.