Skip to content

Commit

Permalink
implemented a basic api used by the nlp rest service
Browse files Browse the repository at this point in the history
  • Loading branch information
lrog committed Jun 5, 2019
1 parent b6f58de commit c933bef
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
@@ -0,0 +1,5 @@
*.DS_Store
.git
.idea
.dat
venv
6 changes: 6 additions & 0 deletions .gitignore
Expand Up @@ -11,6 +11,8 @@ tmp/
*_tmp/
medcat.egg-info/
build/
.idea
venv

#tmp and similar files
.nfs*
Expand All @@ -29,3 +31,7 @@ tmp_*
*hidden*
nohup.out
tmp.py
.DS_Store

# models files
*.dat
132 changes: 132 additions & 0 deletions api/cogstack_api.py
@@ -0,0 +1,132 @@
from flask import Flask
from flask import Response
import json
from medcat.cdb import CDB
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from flask import request
import os


vocab = Vocab()
cdb = CDB()

cdb.load_dict(os.getenv("UMLS_MODEL", '/cat/models/med_ann_norm.dat'))
vocab.load_dict(path=os.getenv("VOCAB_MODEL", '/cat/models/med_ann_norm_dict.dat'))
cat = CAT(cdb, vocab=vocab)

cat.spacy_cat.train = False

app = Flask(__name__)


app_name = 'MEDCAT'
app_lang = 'en'
app_version = os.getenv("CAT_VERSION", '0.1.0')


@app.route('/api/info', methods=['GET'])
def info():
app_info = {'name': app_name,
'language': app_lang,
'version': app_version}
return Response(response=json.dumps(app_info), status=200, mimetype="application/json")


@app.route('/api/process', methods=['POST'])
def process():
payload = request.get_json()
if payload is None or not is_request_payload_valid(payload):
return Response(response="Input Payload should be JSON", status=400)

try:
result = process_request_payload(payload)
response = {'result': result}
return Response(response=json.dumps(response), status=200)

except Exception as e:
Response(response="Internal processing error %d" % e, status=500)


@app.route('/api/process_bulk', methods=['POST'])
def process_bulk():
payload = request.get_json()
if payload is None or not is_request_payload_valid_bulk(payload):
return Response(response="Input Payload should be JSON", status=400)

try:
result = process_request_payload_bulk(payload)
response = {'result': result}
return Response(response=json.dumps(response), status=200)

except Exception as e:
Response(response="Internal processing error %d" % e, status=500)


def is_request_payload_valid(payload):
if 'content' not in payload or 'text' not in payload['content']:
return False
return True


def process_request_payload(payload):
text = payload['content']['text']
entities = cat.get_entities(text)

# parse the result
nlp_result = {'text': text,
'annotations': entities,
'success': True
}

# append footer
if 'footer' in payload['content']:
nlp_result['footer'] = payload['content']['footer']

return nlp_result


def is_request_payload_valid_bulk(payload):
if 'content' not in payload:
return False
return True


def process_request_payload_bulk(payload):

# prepare the payload
content = payload['content']
documents = [(i, content[i]['text']) for i in range(len(content))]

nproc = int(os.getenv('NPROC', 8))
batch_size = min(300, int(len(documents) / (2 * nproc)))
res_raw = cat.multi_processing(documents, nproc=nproc, batch_size=batch_size)

assert len(res_raw) == len(payload['content'])

# parse the result -- need to sort by IDs as there's no guarantee
# on the order of the docs processed
res_sorted = sorted(res_raw, key=lambda x: x[0])
nlp_result = []

# parse the results and add footer if needed
for i in range(len(res_sorted)):
res = res_sorted[i]
in_ct = content[i]

# parse the result
out_res = {'text': res[1]["text"],
'annotations': res[1]["entities"],
'success': True
}
# append the footer
if 'footer' in in_ct:
out_res['footer'] = in_ct['footer']

nlp_result.append(out_res)

return nlp_result


if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)

0 comments on commit c933bef

Please sign in to comment.