In [1]:
import sys

import spacy
import medspacy
import nltk

from medspacy.util import DEFAULT_PIPENAMES

# Overview
Example of how to enable QuickUMLS for concept extraction (CUIs) from text

In [2]:
print('Running on platform: {}'.format(sys.platform))

Running on platform: darwin


In [3]:
# we can only use one of the following tokenizers, so let's use the medspacy tokenizer 
# which handles infixes (e.g. 'h/o', 'chf+cp', etc)

medspacy_pipes = DEFAULT_PIPENAMES.copy()

if 'quickumls' not in medspacy_pipes: 
    medspacy_pipes.add('quickumls')
    
print(medspacy_pipes)
    
nlp = medspacy.load(enable = medspacy_pipes)

{'quickumls', 'target_matcher', 'context', 'sentencizer', 'tokenizer'}
Loading QuickUMLS resources from a default SAMPLE of UMLS data from here: /Users/user925228/opt/anaconda3/lib/python3.8/site-packages/medspacy-0.0.1.4-py3.8.egg/resources/quickumls/QuickUMLS_SAMPLE_lowercase_POSIX_unqlite


In [4]:
nlp.pipe_names

['sentencizer', 'target_matcher', 'context', 'QuickUMLS matcher']

# Process our document with some concepts matched in this default MRCONSO.RRF file of the sample:
https://www.nlm.nih.gov/research/umls/new_users/online_learning/data-files/MRCONSO.RRF

In [5]:
doc = nlp('Decreased dipalmitoyllecithin content found in lung specimens')

for ent in doc.ents:
    print('Entity text : {}'.format(ent.text))
    print('Label (UMLS CUI) : {}'.format(ent.label_))
    print('Similarity : {}'.format(ent._.similarity))
    print('Semtypes : {}'.format(ent._.semtypes))

Entity text : dipalmitoyllecithin
Label (UMLS CUI) : C0000039
Similarity : 0.8888888888888888
Semtypes : {'T121', 'T119'}
