In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.insert(0, "..")

In [3]:
import medspacy

In [4]:
with open("./discharge_summary.txt") as f:
    text = f.read()

In [5]:
enable = ['sentencizer',
 'tagger',
 'parser',
 'ner',
 'target_matcher',
 'context',
 'sectionizer',
 'doc_consumer']

In [90]:
nlp = medspacy.load("en_info_3700_i2b2_2012", enable=enable)



In [91]:
nlp.pipe_names

['sentencizer',
 'tagger',
 'parser',
 'ner',
 'target_matcher',
 'context',
 'sectionizer']

In [92]:
from medspacy.io import DocConsumer

In [93]:
doc_consumer = DocConsumer(nlp, data_types=("ent", "section", "doc"), context=True, sectionizer=True)

In [94]:
doc_consumer.attrs

{'ent': ['text',
  'start_char',
  'end_char',
  'label_',
  'is_negated',
  'is_uncertain',
  'is_historical',
  'is_hypothetical',
  'is_family',
  'section_category',
  'section_parent'],
 'section': ['section_category',
  'section_title_text',
  'section_title_start_char',
  'section_title_end_char',
  'section_text',
  'section_text_start_char',
  'section_text_end_char',
  'section_parent'],
 'doc': ['text']}

In [95]:
nlp.add_pipe(doc_consumer)

In [96]:
doc = nlp(text)

In [97]:
doc._.data.keys()

dict_keys(['ent', 'section', 'doc'])

### Ents

In [98]:
ent_data = doc._.ent_data

In [99]:
ent_data.keys()

odict_keys(['text', 'start_char', 'end_char', 'label_', 'is_negated', 'is_uncertain', 'is_historical', 'is_hypothetical', 'is_family', 'section_category', 'section_parent'])

In [100]:
ent_data_rows = doc._.get_data("ent", as_rows=True)

In [101]:
ent_data_rows[0]

('Hydrochlorothiazide',
 163,
 182,
 'TREATMENT',
 False,
 False,
 False,
 False,
 False,
 'allergies',
 None)

In [102]:
ents_df = doc._.to_dataframe("ent")

In [103]:
ents_df.head()

Unnamed: 0,text,start_char,end_char,label_,is_negated,is_uncertain,is_historical,is_hypothetical,is_family,section_category,section_parent
0,Hydrochlorothiazide,163,182,TREATMENT,False,False,False,False,False,allergies,
1,Abdominal pain,239,253,PROBLEM,False,False,False,False,False,chief_complaint,
2,Invasive Procedure,273,291,TREATMENT,False,False,False,False,False,chief_complaint,
3,PICC line,293,302,TREATMENT,False,False,False,False,False,chief_complaint,
4,ERCP,314,318,TEST,False,False,False,False,False,chief_complaint,


In [104]:
ents_df[ents_df["is_negated"] == True]

Unnamed: 0,text,start_char,end_char,label_,is_negated,is_uncertain,is_historical,is_hypothetical,is_family,section_category,section_parent
9,metastasis,519,529,PROBLEM,True,False,False,False,False,history_of_present_illness,
19,alcohol or drug use,788,807,PROBLEM,True,False,False,False,False,social_history,


### Sections

In [105]:
section_data = doc._.section_data

In [106]:
section_data.keys()

odict_keys(['section_category', 'section_title_text', 'section_title_start_char', 'section_title_end_char', 'section_text', 'section_text_start_char', 'section_text_end_char', 'section_parent'])

In [107]:
section_data_rows = doc._.get_data("section", as_rows=True)

In [108]:
section_data_rows[0]

(None,
 None,
 0,
 0,
 'Admission Date:  [**2573-5-30**]              Discharge Date:   [**2573-7-1**]\n\nDate of Birth:  [**2498-8-19**]             Sex:   F\n\n',
 0,
 134,
 None)

In [109]:
section_df = doc._.to_dataframe("section")

## Doc

In [110]:
doc_data = doc._.doc_data

In [111]:
doc_data.keys()

odict_keys(['text'])

In [112]:
doc_df = doc._.to_dataframe("doc")

In [113]:
doc_df

Unnamed: 0,text
0,Admission Date: [**2573-5-30**] ...


# Customizing attributes

In [126]:
nlp2 = medspacy.load("en_info_3700_i2b2_2012", enable=enable)



In [127]:
doc_consumer2 = DocConsumer(nlp2, data_types=("ent",), 
                            attrs={
                                "ent": {
                                    "lower_",
                                    "label_",
                                    "is_negated",
                                    "section_category",
                                    
                                }
                                
                            }
)

In [128]:
nlp2.add_pipe(doc_consumer2)

In [129]:
doc = nlp2(text)

In [130]:
doc._.to_dataframe("ent")

Unnamed: 0,lower_,label_,is_negated,section_category
0,hydrochlorothiazide,TREATMENT,False,allergies
1,abdominal pain,PROBLEM,False,chief_complaint
2,invasive procedure,TREATMENT,False,chief_complaint
3,picc line,TREATMENT,False,chief_complaint
4,ercp,TEST,False,chief_complaint
5,sphincterotomy,TREATMENT,False,chief_complaint
6,a recent stroke,PROBLEM,False,history_of_present_illness
7,abdominal pain,PROBLEM,False,history_of_present_illness
8,imaging,TEST,False,history_of_present_illness
9,metastasis,PROBLEM,True,history_of_present_illness
