In [3]:
import spacy
import json
from pre import preprocesing as pre
from post import Struct

__MODEL_PATH = "models/mama-ents"
__LABELS = spacy.info(__MODEL_PATH)["labels"]["ner"]


In [6]:
def process_note(text, ehr):
    debug = "texto sin procesar: " + text + "\n\n"

    # preprocesar el texto:
    p = pre.Preprocesing("pre/acronimos.json")
    text = p.fix(text)
    debug += "texto preprocesado: " + text + "\n\n"

    # pasar el texto preprocesado al modelo y obtener las entidades que ha detectado:
    nlp = spacy.load(__MODEL_PATH)
    doc = nlp(text)
    debug += "entidades encontradas: " + "\n"

    for ent in doc.ents:
        debug += "- " + ent.label_ + ": " + ent.text + "\n"
    debug += "\n\n"

    # sacar los diagnosticos:
    strc = Struct.Struct(__LABELS)
    ents = doc.ents
    diags = strc.struct(text=text, ents=ents)

    if len(diags) > 0:
        ehr_info = { "ehr": ehr, "diags": diags}
    else:
        labels = [ent.label_ for ent in ents]
        ehr_info = { "ehr": ehr, "ents": str(ents), "labels":labels, "text": text}

    print(debug)

    return json.dumps(ehr_info, indent=4)

In [10]:

text = """anemia a estudio.
 carcinoma ductal infiltrante t4 n3 m1 ( afectacion supraclavicular bilateral)."""

ehr = "xxx"

result = process_note(text, ehr)



texto sin procesar: anemia a estudio.
 carcinoma ductal infiltrante t4 n3 m1 ( afectacion supraclavicular bilateral).

texto preprocesado: anemia a estudio.
 carcinoma ductal infiltrante t4 n3 m1 ( afectacion supraclavicular bilateral).

entidades encontradas: 
- CANCER_CONCEPT: carcinoma
- CANCER_TYPE: ductal
- CANCER_EXP: infiltrante
- TNM: t4 n3 m1
- CANCER_MET: afectacion
- CANCER_LOC: supraclavicular
- CANCER_LOC: bilateral





In [9]:
print(result)

{
    "ehr": "xxx",
    "diags": [
        {
            "CANCER_CONCEPT": [
                "carcinoma"
            ],
            "CANCER_TYPE": [
                "ductal"
            ],
            "CANCER_EXP": [
                "infiltrante"
            ],
            "TNM": [
                "t4 n3 m1"
            ],
            "CANCER_MET": [
                "afectacion"
            ],
            "CANCER_LOC": [
                "supraclavicular",
                "bilateral"
            ],
            "text": "anemia a estudio.\n carcinoma ductal infiltrante t4 n3 m1 ( afectacion supraclavicular bilateral"
        }
    ]
}
