In [1]:
#run this cell when online
!pip install xlrd

[33mYou are using pip version 10.0.1, however version 19.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## install dependencies and set data paths

In [94]:
import pandas as pd
import numpy as np

import os
import re
from collections import Counter

In [95]:
#use this cell when working online
path = '/floyd/home/ed-triage'
data_path = '/floyd/home/data'

In [96]:
#use this cell when working from home
path = '/Users/jjaskolkambp/Desktop/machine learning/my_projects/ed-triage'
data_path = '/Users/jjaskolkambp/Desktop/machine learning/my_projects/data/ED triage project/egh'

In [97]:
#loading in the entire spreadsheet as a dataframe
clin = pd.read_excel(data_path + '/EGH ED visits FY1819_with additions.xlsx')

In [98]:
len(clin)

85154

In [99]:
clin.columns

Index(['ID', 'ChartNumber', 'EncounterNumber', 'TriageLevel', 'AgeNumber',
       'AgeInYrs', 'GenderDesc', 'Triage Date & Time', 'Reg Date & Time',
       'PIA Date & Time', 'Disposition Date & Time', 'DischargeDisposition',
       'DischargeDispositionDesc', 'Left ED Date & Time',
       'PresentingComplaint', 'PresentingComplaintDesc', 'MainDiagnosisCode',
       'MainDiagnosisCodeDesc', 'AdmitLocation', 'PatientService',
       'SubjectiveNotes', 'InfectionControlScreening', 'MedicalHistory',
       'BloodPressure_LastEDReading', 'O2Saturation_LastEDReading',
       'Pulse_LastEDReading', 'Temperature_LastEDReading'],
      dtype='object')

In [100]:
clin.dropna(subset = ['SubjectiveNotes', 'InfectionControlScreening', 'MedicalHistory'], inplace = True)
clin.shape

(63474, 27)

## The next few cells are for getting a feel of the data in the different columns and how they might be useful

In [8]:
discharge_count = Counter(clin['DischargeDisposition']); discharge_count

Counter({17: 70703,
         62: 762,
         7: 11199,
         6: 374,
         16: 493,
         40: 226,
         8: 242,
         72: 113,
         63: 250,
         30: 346,
         64: 171,
         12: 242,
         71: 3,
         14: 3,
         61: 12,
         90: 10,
         13: 3,
         9: 2})

In [9]:
discharge_count2 = Counter(clin['DischargeDispositionDesc']); discharge_count2

Counter({'Discharge to private home, condo, apt without support service/referral': 70703,
         'Left at his/her own risk post-initial treatment': 762,
         'Admit to reporting facility as inpatient to another unit from amb care': 11199,
         'Admit to reporting facility as inpatient to SCU or OR from amb care': 374,
         'Discharge to private home, condo, apt with support service/referral': 493,
         'Transfer to Group/supportive living': 226,
         'Transfer to another acute care facility directly from amb care': 242,
         'Died in Facility': 113,
         'Left After Triage': 250,
         'Transfer to Residential care': 346,
         'Left After Initial Assessment': 171,
         'Intra-facility transfer to day surgery': 242,
         'Dead on arrival': 3,
         'Intra-facility transfer to clinic': 3,
         'Left at his/her own risk following registration': 12,
         'Transfer to correctional facility': 10,
         'Intra-facility transfer to ED'

In [10]:
comp_count = Counter(clin['PatientService']); comp_count

Counter({nan: 73660,
         'Cardiology': 1550,
         'ALC General Medicine': 201,
         'General Medicine': 4512,
         'INTENSIVE CARE UNIT': 253,
         'Respirology': 852,
         'General Surgery': 650,
         'Paediatrics': 882,
         'Mental Health': 1198,
         'Orthopaedics': 430,
         'ALC Neurology': 64,
         'Genitourinary': 182,
         'Neurology': 456,
         'Palliative': 65,
         'ALC Respirology': 28,
         'Oncology': 38,
         'ALC Orthopaedics': 23,
         'ALC Cardiology': 38,
         'AD PALLIATIVE': 45,
         'AD General Medicine': 2,
         'Obstetrics': 17,
         'AD Respirology': 1,
         'AD Oncology': 1,
         'ALC General Surgery': 2,
         'Neonatal Retro Transfers': 1,
         'ALC Oncology': 1,
         'AD Neurology': 1,
         'Newborn': 1})

## refactoring some of the cells so they are clean and useful

In [101]:
o2sat = [item if item[-2:] != 'ra' else item[:-2] for item in clin['O2Saturation_LastEDReading'].astype('str')]
o2sat = [np.nan if (len(item) <2 or len(item) >3) else float(item) for item in o2sat] 
o2sat = [np.nan if item == 'nan' else float(item) for item in o2sat] 
clin['o2sat'] = o2sat

In [102]:
pulse = clin['Pulse_LastEDReading']
pulse = [re.sub("[^0-9]", "", str(item)) for item in pulse]
pulse = [np.nan if item == '' else float(item) for item in pulse] 
#set(pulse)
clin['pulse'] = pulse

In [103]:
temp = clin['Temperature_LastEDReading']
temp = [re.sub("[^0-9]", "", str(item)) for item in temp]
temp = [np.nan if item == '' else float(item) for item in temp] 
temp = [item/10 if item > 100 else item for item in temp]
temp = [np.nan if item > 40  else item for item in temp]
#set(temp)
clin['temp'] = temp

In [104]:
def bptrans(bp):
    if pd.isnull(bp) or len(bp.split('/')) !=2:
        return [np.nan, np.nan]
    res = []
    for x in bp.split('/'):
        try:
            float(x)
            res =  [float(x) for x in bp.split('/')]
        except: 
            res =  [np.nan, np.nan]
        return res


In [218]:
#this is to clean the text in the subjective notes column (a little bit)
def fixencode(s):
    if pd.isnull(s):
        s = "None"
    s = s[14:]
    s =s.replace('<LT><LF>','')
    s = s.replace('<LT>LF>','')
    s =s.replace('Pt.', 'patient')
    s =s.replace('Pt', 'patient')
    s = s.lower()
    s = s.replace('pt.', 'patient')
    s = s.replace('y/o', 'year old')
    s = s.replace('c/o', 'complains of')
    s = s.replace('sob', 'shortness of breath')
    s = s.replace('c/p', 'chest pain')
    s = s.replace('hrs', 'hours')
    s = s.replace('hx', 'history')
    s = s.replace('n/v', 'nausea and/or vomiting')
    s = s.replace('a/e', 'air entry')
    s = s.replace('a/o', 'alert and oriented')
    s = s.replace('a&o', 'alert and oriented')
    s = s.replace('d/c', 'discharge')
    s = s.replace('u/s', 'ultrasound')
    s = s.replace('yrs', 'years')
    s = s.replace('lmp', 'last menstrual period')
    s = s.replace('w/', 'with')
    s = s.replace('@', 'at ')
    s = s.replace('b/c', 'because')
    s = re.sub(r'\bpt.','patient ',s)
    s = re.sub(r'\blt.','left ',s)
    s = re.sub(r'\brt.','right ',s)
    s = re.sub(r'\bmd.','doctor ',s)
    s = re.sub(r'\bra\b','room air ',s)
    s = re.sub(r'\bbp.','blood pressure ',s)
    s = re.sub(r'\bcp.','chest pain ',s)
    s = re.sub(r'\bfd.','family doctor ',s)
    s = s.replace('abx', 'antibiotics')
    s = s.replace(' htn ', ' hypertension ')
    s = re.sub(r'\btyl\b', 'tylenol', s)
    s = re.sub(r'\bprn\b', 'as needed ',s)
    s = re.sub(r'\bvag\b', 'vaginal ',s)
    s = re.sub(r'\bpo\b', 'by mouth ',s)
    s = re.sub(r'\bllq\b', 'left lower quadrant',s)
    s = re.sub(r'\brlq\b', 'right lower quadrant  ',s)
    s = re.sub(r'\bluq\b', 'left upper quadrant',s)
    s = re.sub(r'\bruq\b', 'right upper quadrant  ',s)
    s = re.sub(r'\buti\b', 'urinary tract infection ',s)
    s = re.sub(r'\brd', 'right digit number ',s)
    s = re.sub(r'\bld', 'left digit number ',s)
    s = re.sub(r'\br\b', 'right ',s)
    s = re.sub(r'\bl\b', 'left ',s)
    s = re.sub(r'\bed\b', 'emergency department ',s)
    s = re.sub(r'\ber\b', 'emergency room ',s)
    s = re.sub(r'\bbm\b', 'bowel movement ',s)
    s = s.replace(' dx ', ' diagnosis')
    s = s.replace(' sts ', ' states')
    s = re.sub(r'#\s', 'fracture ', s)
    
    #remove extra white spaces
    s = re.sub(r'\s+', ' ', s)

    #note that what's missing here is removal of punctuation
    return s

In [219]:
clin['CleanSubjectiveNotes'] = clin.SubjectiveNotes.map(fixencode)

In [220]:
#just using this field to look at some examples of how the text cleaning is going
for item in clin['CleanSubjectiveNotes'].iloc[10000:10010]:
    print (item)
    print ()

patient reported that he needs help for alcohol withdrawal. having palpitations, anxiety attacks and tremors.

patient reported that she was going down the stairs today and missed 1 step. twisted her left foot.

patient reported was walking faster to catch up with the traffic light and heard a crack, unable to walk normally. nwb left leg.

patient reffered to emergency department by fmd for ongoing left lower quadrant pain starting 4 days ago. patient was prescribed flagyl and cipro 500mg twice a day with no improvement since saturday. patient states he did start taking the antibiotics on sat. patient states that pain is tender and ongoing. denies radiating. patient states he has a weird taste in his mouth. pain increases with movement and activity. patient states that he has been voiding more frequently, but also states he has increased his fluid intake. denies all other urinary symptoms. states no bowel concerns.

16 days old baby, born at trillium. born at 36 weeks, natural. no comp

In [108]:
clin['BP'] = clin.BloodPressure_LastEDReading.map(bptrans)

clin[['systolic', 'diastolic']] = pd.DataFrame(clin.BP.tolist(), index= clin.index)
clin["Gender"] = clin.GenderDesc.map(lambda x: 1 if x == "Male" else 0)

## split the screening questions
- result is a df called: screening_df

In [109]:
qlist = ['Are you feeling feverish or have had shakes or chills in the last 24 hours?',
         'Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting?',
        'Do you have a new Rash?',
        'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours?',
         'Have you travelled outside of Canada/USA in the last 3 weeks?',
         'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks?',
         'Have you received Health Care in another country in the last 2 years?',
        'Do you have a new/worse cough or shortness of breath?',
         'If so, select all countries that apply',
        'If so, select all infectious diseases that apply']

In [110]:
#making a dict so that the indect of an item on the list, matches the question later
q_dict = {i:q for i,q in enumerate(qlist)}

In [111]:
q_dict

{0: 'Are you feeling feverish or have had shakes or chills in the last 24 hours?',
 1: 'Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting?',
 2: 'Do you have a new Rash?',
 3: 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours?',
 4: 'Have you travelled outside of Canada/USA in the last 3 weeks?',
 5: 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks?',
 6: 'Have you received Health Care in another country in the last 2 years?',
 7: 'Do you have a new/worse cough or shortness of breath?',
 8: 'If so, select all countries that apply',
 9: 'If so, select all infectious diseases that apply'}

In [112]:
#this is going to be a list made of up all the entries in the infection control screening column
#casting the results as string to manipulate later
qans = list(clin['InfectionControlScreening'].str[14:].astype('str'))

In [113]:
len(qans)

63474

In [114]:
#this is going to give me a way to split the questions by finding where they occur in the field
split_points = []
for item in qans:
    temp_points = []
    for word in qlist:
        temp_points.append(item.find(word))
    split_points.append(temp_points)
    

In [115]:
#this is going to give me list of places to split each entry (using only the questions that are actually present)
real_points = []
for item in split_points:
    real_points.append([idx for idx in sorted(item) if idx > 0])

In [116]:
split_points[10000:10010]

[[241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1],
 [241, 432, 400, 325, 0, 70, 558, 179, -1, -1]]

In [117]:
#anything from the list above which would have been empty in the original data
#gives a list of -1's in split_points and nothing in real_points
real_points[10000:10010]

[[70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558],
 [70, 179, 241, 325, 400, 432, 558]]

In [118]:
print(qans[1000])
print()
print (split_points[1000])
print ()
print(real_points[1000])

Have you travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>Do you have a new/worse cough or shortness of breath? Y<LT>LF>Are you feeling feverish or have had shakes or chills in the last 24 hours? N<LT>LF>Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N<LT>LF>Do you have a new Rash? N

[241, -1, 400, 325, 0, 70, -1, 179, -1, -1]

[70, 179, 241, 325, 400]


In [119]:
#this should mutate qans so each item is now a list of q and a's
for idx in range(len(qans)):
    qans[idx] = [qans[idx][i : j] for i, j in zip([0] + real_points[idx], real_points[idx] + [None])]
    

In [120]:
qans[1000]

['Have you travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>',
 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>',
 'Do you have a new/worse cough or shortness of breath? Y<LT>LF>',
 'Are you feeling feverish or have had shakes or chills in the last 24 hours? N<LT>LF>',
 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N<LT>LF>',
 'Do you have a new Rash? N']

In [121]:
qans[10000]

['Have you travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>',
 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N<LT>LF>',
 'Do you have a new/worse cough or shortness of breath? N<LT>LF>',
 'Are you feeling feverish or have had shakes or chills in the last 24 hours? N<LT>LF>',
 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N<LT>LF>',
 'Do you have a new Rash? N<LT>LF>',
 'Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting? N<LT>LF>',
 'Have you received Health Care in another country in the last 2 years? N']

In [122]:
#this will mutate each item of each list in qans and drop the <LT>LF> if present
for q in qans:
    for idx in range(len(q)):
        if q[idx].find('<LT>LF>') != -1:
            q[idx] = q[idx][:-7]
        

In [123]:
qans[1000]

['Have you travelled outside of Canada/USA in the last 3 weeks? N',
 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N',
 'Do you have a new/worse cough or shortness of breath? Y',
 'Are you feeling feverish or have had shakes or chills in the last 24 hours? N',
 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N',
 'Do you have a new Rash? N']

In [124]:
qans[10000]

['Have you travelled outside of Canada/USA in the last 3 weeks? N',
 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N',
 'Do you have a new/worse cough or shortness of breath? N',
 'Are you feeling feverish or have had shakes or chills in the last 24 hours? N',
 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N',
 'Do you have a new Rash? N',
 'Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting? N',
 'Have you received Health Care in another country in the last 2 years? N']

In [125]:
qans[1000]

['Have you travelled outside of Canada/USA in the last 3 weeks? N',
 'Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks? N',
 'Do you have a new/worse cough or shortness of breath? Y',
 'Are you feeling feverish or have had shakes or chills in the last 24 hours? N',
 'Do you have a new onset of Vomiting/Diarrhea in the last 24 hours? N',
 'Do you have a new Rash? N']

In [126]:
seg_list = []
for q in qans:
    #print ('initial:', q)
    test_question = q
    res = []
    for item in test_question:
        #print (item)
        for qq in qlist:
            if item.find(qq) != -1:
                #print ('question:',item[:len(qq)])
                #print ('answer:',item[len(qq)+1:])
                res.append([item[:len(qq)],item[len(qq)+1:] ])
    seg_list.append(res)
    #print ('\nresult:',res, '\n\n')

In [127]:
len(seg_list)

63474

In [128]:
seg_list2 = []
for q in qans:
    #print ('initial:', q)
    test_question = q
    res = {}
    for item in test_question:
        #print (item)
        for qq in qlist:
            if item.find(qq) != -1:
                question = item[:len(qq)]
                answer = item[len(qq)+1:]
                res[question] = answer
    seg_list2.append(res)

In [129]:
final_list = []
for q in range(len(split_points)):
    temp = []
    for i in range(len(split_points[q])):
        if split_points[q][i] == -1:
            #print('nan')
            temp.append('nan')
        else:
            #print (brief_list2[q][q_dict[i]])  # q_dict[i], i, brief_splits[q][i],
            temp.append(seg_list2[q][q_dict[i]])
    final_list.append(temp)
    #print ('\n next item \n')
            

In [130]:
len(final_list)

63474

In [131]:
final_list[100:110]

[['Y', 'N', 'N', 'Y', 'Y', 'N', 'nan', 'Y', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['nan', 'N', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['nan', 'N', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan']]

In [132]:
final_list[1000:1020]

[['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'Y', 'nan', 'nan'],
 ['N', 'nan', 'N', 'Y', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['Y', 'nan', 'N', 'N', 'N', 'N', 'nan', 'Y', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'IND', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['Y', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan', 'N', 'nan', 'nan'],
 ['N', 'nan', 'N', 'N', 'N', 'N', 'nan',

In [133]:
screening_df = pd.DataFrame(final_list, columns = qlist)
screening_df.head()

Unnamed: 0,Are you feeling feverish or have had shakes or chills in the last 24 hours?,Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting?,Do you have a new Rash?,Do you have a new onset of Vomiting/Diarrhea in the last 24 hours?,Have you travelled outside of Canada/USA in the last 3 weeks?,Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks?,Have you received Health Care in another country in the last 2 years?,Do you have a new/worse cough or shortness of breath?,"If so, select all countries that apply","If so, select all infectious diseases that apply"
0,N,N,N,N,N,N,N,N,,
1,N,,N,,N,N,,N,,
2,N,,N,N,Y,N,,N,,
3,N,,N,N,N,N,,N,,
4,N,,N,N,Y,N,,N,IND,


In [134]:
screening_df.shape

(63474, 10)

In [135]:
screening_df.iloc[30000:30010]

Unnamed: 0,Are you feeling feverish or have had shakes or chills in the last 24 hours?,Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting?,Do you have a new Rash?,Do you have a new onset of Vomiting/Diarrhea in the last 24 hours?,Have you travelled outside of Canada/USA in the last 3 weeks?,Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks?,Have you received Health Care in another country in the last 2 years?,Do you have a new/worse cough or shortness of breath?,"If so, select all countries that apply","If so, select all infectious diseases that apply"
30000,N,,N,N,N,N,,N,,
30001,,,,N,N,N,,Y,,
30002,Y,,,N,N,,,Y,,
30003,Y,,N,N,N,N,,N,,
30004,N,,Y,N,N,N,,N,,
30005,N,,N,N,N,N,,N,,
30006,N,,N,N,Y,N,,N,USA,
30007,N,,N,N,N,N,,N,,
30008,N,,N,N,N,N,,N,,
30009,Y,,N,N,N,N,,Y,,


In [136]:
screening_df.replace('nan',np.NaN, inplace = True)

In [137]:
screening_df.iloc[30000:30010]

Unnamed: 0,Are you feeling feverish or have had shakes or chills in the last 24 hours?,Have you ever been isolated/required isolation for an infectious disease when receiving care in a healthcare setting?,Do you have a new Rash?,Do you have a new onset of Vomiting/Diarrhea in the last 24 hours?,Have you travelled outside of Canada/USA in the last 3 weeks?,Have you had contact with a sick person who has travelled outside of Canada/USA in the last 3 weeks?,Have you received Health Care in another country in the last 2 years?,Do you have a new/worse cough or shortness of breath?,"If so, select all countries that apply","If so, select all infectious diseases that apply"
30000,N,,N,N,N,N,,N,,
30001,,,,N,N,N,,Y,,
30002,Y,,,N,N,,,Y,,
30003,Y,,N,N,N,N,,N,,
30004,N,,Y,N,N,N,,N,,
30005,N,,N,N,N,N,,N,,
30006,N,,N,N,Y,N,,N,USA,
30007,N,,N,N,N,N,,N,,
30008,N,,N,N,N,N,,N,,
30009,Y,,N,N,N,N,,Y,,


## making the new target cells

In [138]:
admit_count= Counter(clin['AdmitLocation']); admit_count

Counter({nan: 56890,
         'E.5TH': 614,
         'E.7TH East': 842,
         'E.7TH West': 385,
         'E.10.RESP': 590,
         'E.MAU': 1043,
         'E.OVE': 646,
         'E.8SURGERY': 696,
         'E.9th MED': 230,
         'E.6TH': 554,
         'E.CCU': 202,
         'E.ICU': 96,
         'E.OVEMH': 156,
         'E.9MEDICAL': 143,
         'E.4TH': 10,
         'E.8MEDFLEX': 67,
         'E.SCN': 2,
         'E.9TH EAST': 127,
         'E.9TH WEST': 171,
         'E.7MEDFLEX': 10})

In [139]:
clin['outcome'] = clin['AdmitLocation'].astype('str').map(
{'nan': 'discharge',
         'E.OVE': 'madmit',
         'E.9MEDICAL': 'madmit',
         'E.CCU': 'madmit',
         'E.10.RESP': 'madmit',
         'E.MAU': 'madmit',
         'E.ICU': 'ICU',
         'E.8SURGERY': 'sadmit',
         'E.6TH': 'madmit',
         'E.5TH': 'madmit',
         'E.OVEMH': 'madmit',
         'E.7TH West': 'madmit',
         'E.9th MED': 'madmit',
         'E.7TH East': 'madmit',
         'E.4TH': 'madmit',
         'E.8MEDFLEX': 'madmit',
         'E.SCN': 'madmit',
         'E.9TH EAST': 'madmit',
         'E.9TH WEST': 'madmit',
         'E.7MEDFLEX': 'madmit'})

In [140]:
#first target columns is based on the admitting location
clin['target'] = clin['outcome'].map({'discharge': 1, 'madmit': 2, 'sadmit': 3, 'ICU': 4})

In [141]:
serv_count = Counter(clin['PatientService']); serv_count

Counter({nan: 56890,
         'Mental Health': 770,
         'Cardiology': 911,
         'General Medicine': 2508,
         'Palliative': 24,
         'Oncology': 27,
         'Paediatrics': 638,
         'Orthopaedics': 183,
         'General Surgery': 443,
         'ALC General Medicine': 80,
         'ALC Neurology': 32,
         'Neurology': 233,
         'Respirology': 413,
         'Genitourinary': 149,
         'ALC Respirology': 11,
         'INTENSIVE CARE UNIT': 100,
         'ALC Cardiology': 21,
         'ALC Orthopaedics': 9,
         'AD PALLIATIVE': 15,
         'AD General Medicine': 2,
         'Obstetrics': 11,
         'AD Oncology': 1,
         'ALC General Surgery': 1,
         'Neonatal Retro Transfers': 1,
         'Newborn': 1})

In [142]:
clin['service'] = clin['PatientService'].astype('str').map(
    {'nan': 'discharge',
         'Cardiology': 'madmit',
         'ALC General Medicine': 'madmit',
         'General Medicine': 'madmit',
         'INTENSIVE CARE UNIT': 'ICU',
         'Respirology': 'madmit',
         'General Surgery': 'sadmit',
         'Paediatrics': 'madmit',
         'Mental Health': 'madmit',
         'Orthopaedics': 'sadmit',
         'ALC Neurology': 'madmit',
         'Genitourinary': 'sadmit',
         'Neurology': 'madmit',
         'Palliative': 'madmit',
         'ALC Respirology': 'madmit',
         'Oncology': 'madmit',
         'ALC Orthopaedics': 'sadmit',
         'ALC Cardiology': 'madmit',
         'AD PALLIATIVE': 'madmit',
         'AD General Medicine': 'madmit',
         'Obstetrics': 'madmit',
         'AD Respirology': 'madmit',
         'AD Oncology': 'madmit',
         'ALC General Surgery': 'sadmit',
         'Neonatal Retro Transfers': 'madmit',
         'ALC Oncology': 'madmit',
         'AD Neurology': 'madmit',
         'Newborn': 'madmit'})

In [143]:
service_count = Counter(clin['service']); service_count

Counter({'discharge': 56890, 'madmit': 5699, 'sadmit': 785, 'ICU': 100})

In [144]:
#second target is based on the admitting service
clin['target2'] = clin['service'].map({'discharge': 1, 'madmit': 2, 'sadmit': 3, 'ICU': 4})

In [145]:
Counter(clin['target2'])

Counter({1: 56890, 2: 5699, 3: 785, 4: 100})

In [146]:
#creating this for later use to map the numbers in the dataframe column to their original meaning
out_dict = {'discharge': 1, 'madmit': 2, 'sadmit': 3, 'ICU': 4}
out_dict = {v:k for k,v in out_dict.items()}
out_dict

{1: 'discharge', 2: 'madmit', 3: 'sadmit', 4: 'ICU'}

In [147]:
#discharge vs. not target
clin['discharge'] = clin['outcome'].map({'discharge': 'discharge', 'madmit': 'admit', 'sadmit': 'admit', 'ICU': 'admit'})

In [148]:
clin['target3'] = clin['outcome'].map({'discharge': 1, 'madmit': 0, 'sadmit': 0, 'ICU': 0})

In [149]:
Counter(clin['discharge']), Counter(clin['target3'])

(Counter({'discharge': 56890, 'admit': 6584}), Counter({1: 56890, 0: 6584}))

In [150]:
#a target for trying to tell apart the inpatient disposition, only in admitted patients
clin['dispo']= clin['outcome'].map({'discharge': np.nan, 'madmit': 'madmit', 'sadmit': 'sadmit', 'ICU': 'ICU'})

In [151]:
clin['target4'] = clin['dispo'].map({np.nan:np.nan, 'madmit': 1, 'sadmit': 2, 'ICU': 3})
#for some reason a can't make a counter out of this without it crashing so I've stopped trying

In [152]:
Counter(clin['dispo']), Counter(clin['target4'].dropna())

(Counter({nan: 56890, 'madmit': 5792, 'sadmit': 696, 'ICU': 96}),
 Counter({1.0: 5792, 2.0: 696, 3.0: 96}))

In [153]:
#gonna make a target that is ICU vs other
clin['ICUvsother'] = clin['PatientService'][clin['PatientService'] == 'INTENSIVE CARE UNIT']
#clin['PatientService']

In [154]:
clin['target5'] = clin['ICUvsother'].map({'INTENSIVE CARE UNIT':1, np.nan:0})

In [155]:
sorted(clin.columns)

['AdmitLocation',
 'AgeInYrs',
 'AgeNumber',
 'BP',
 'BloodPressure_LastEDReading',
 'ChartNumber',
 'CleanSubjectiveNotes',
 'DischargeDisposition',
 'DischargeDispositionDesc',
 'Disposition Date & Time',
 'EncounterNumber',
 'Gender',
 'GenderDesc',
 'ICUvsother',
 'ID',
 'InfectionControlScreening',
 'Left ED Date & Time',
 'MainDiagnosisCode',
 'MainDiagnosisCodeDesc',
 'MedicalHistory',
 'O2Saturation_LastEDReading',
 'PIA Date & Time',
 'PatientService',
 'PresentingComplaint',
 'PresentingComplaintDesc',
 'Pulse_LastEDReading',
 'Reg Date & Time',
 'SubjectiveNotes',
 'Temperature_LastEDReading',
 'Triage Date & Time',
 'TriageLevel',
 'diastolic',
 'discharge',
 'dispo',
 'o2sat',
 'outcome',
 'pulse',
 'service',
 'systolic',
 'target',
 'target2',
 'target3',
 'target4',
 'target5',
 'temp']

## splitting into a bunch of sub dataframes
- loading the excel file is very slow, but loading csv files is faster so I'm making a bunch of dataframes of subsets of data for easy saving/loading/combining

In [156]:
tab_df = clin[['ID', 'TriageLevel',
       'AgeInYrs', 'GenderDesc', 'DischargeDisposition',
       'PresentingComplaint', 'PresentingComplaintDesc', 'AdmitLocation', 'PatientService',
       'BloodPressure_LastEDReading','systolic', 'diastolic','temp','pulse','o2sat']]

In [157]:
target_df = clin[['outcome','target', 'service','target2', 'discharge', 'target3','dispo','target4', 'ICUvsother', 'target5']]

In [158]:
date_df = clin[['Triage Date & Time', 'Reg Date & Time',
       'PIA Date & Time', 'Disposition Date & Time', 'Left ED Date & Time']]

## parsing of the medical history into categories

In [159]:
histories = Counter(clin['MedicalHistory']); len(histories)

18803

In [160]:
history = list(clin['MedicalHistory'].str[16:].astype('str'))

In [161]:
#this is to clean the text in the medical history column (a little bit)
def fixhx(s):
    s =s.replace('<LT><LF>','')
    s = s.replace('<LT>LF>','')
    s =s.replace('Pt.', 'patient')
    s =s.replace('Pt', 'patient')
    s = s.lower()
    s = s.replace('pt.', 'patient')
    s = s.replace('y/o', 'year old')
    s = s.replace('c/o', 'complains of')
    s = s.replace('sob', 'shortness of breath')
    s = s.replace('c/p', 'chest pain')
    s = s.replace('hrs', 'hours')
    s = s.replace('hx', 'history')
    s = s.replace('n/v', 'nausea and/or vomiting')
    s = s.replace('a/e', 'air entry')
    s = s.replace('a/o', 'alert and oriented')
    s = s.replace('a&o', 'alert and oriented')
    s = s.replace('d/c', 'discharge')
    s = s.replace('u/s', 'ultrasound')
    s = s.replace('yrs', 'years')
    s = s.replace('lmp', 'last menstrual period')
    s = s.replace('w/', 'with')
    s = s.replace('@', 'at ')
    s = s.replace('b/c', 'because')
    s = re.sub(r'\bpt.','patient ',s)
    s = re.sub(r'\blt.','left ',s)
    s = re.sub(r'\brt.','right ',s)
    s = re.sub(r'\bmd.','doctor ',s)
    s = re.sub(r'\bra\b','room air ',s)
    s = re.sub(r'\bbp.','blood pressure ',s)
    s = re.sub(r'\bcp.','chest pain ',s)
    s = re.sub(r'\bfd.','family doctor ',s)
    s = s.replace('abx', 'antibiotics')
    s = s.replace(' htn ', ' hypertension ')
    s = re.sub(r'\btyl\b', 'tylenol', s)
    s = re.sub(r'\bprn\b', 'as needed ',s)
    s = re.sub(r'\bvag\b', 'vaginal ',s)
    s = re.sub(r'\bpo\b', 'by mouth ',s)
    s = re.sub(r'\bllq\b', 'left lower quadrant',s)
    s = re.sub(r'\brlq\b', 'right lower quadrant  ',s)
    s = re.sub(r'\bluq\b', 'left upper quadrant',s)
    s = re.sub(r'\bruq\b', 'right upper quadrant  ',s)
    s = re.sub(r'\buti\b', 'urinary tract infection ',s)
    s = re.sub(r'\brd', 'right digit number',s)
    s = re.sub(r'\bld', 'left digit number',s)
    s = re.sub(r'\br\b', 'right ',s)
    s = re.sub(r'\bl\b', 'left ',s)
    s = re.sub(r'\bed\b', 'emergency department ',s)
    s = re.sub(r'\ber\b', 'emergency room ',s)
    s = re.sub(r'\bbm\b', 'bowel movement ',s)
    s = re.sub(r'#\s', 'fracture ', s)
    
    #remove extra white spaces
    s = re.sub(r'\s+', ' ', s)

    #note that what's missing here is removal of punctuation
    return s

In [162]:
replacements = (',','<LT>LF>', '.')
drop_list = ('"', '?')
split_hist = []
for item in history:
    for r in replacements:
        item = item.replace(r, '::')
    
    item = item.split('::')
    
    item = [re.sub(r'\(.*?\)\ *', '', s) for s in item]
    
    item = [fixhx(s) for s in item]
    for d in drop_list:
        item = [x.replace(d,'') for x in item]
    
    item = [x.strip().lower() for x in item if x != 'nan']
    item = [x for x in item if len(x) > 2]
    
    item = list(filter(None, item))
    
    split_hist.append(item)
    

In [163]:
#rejoining them as a string with diagnoses separated by commas
join_hist = [', '.join(item) for item in split_hist]

In [164]:
#replacing empty string with NaN
join_hist = [np.nan if item == 'nan' else item for item in join_hist]

In [165]:
split_hist[2000:2010]

[['mitral valve regurgitation', 'hypertension'],
 ['no significant medical history'],
 ['no significant medical history'],
 ['no significant medical history'],
 ['asthma'],
 ['no significant medical history'],
 ['no significant medical history'],
 ['tia', 'high cholesterol', 'hypertension', 'hypothyroid'],
 ['no significant medical history'],
 ['asthma']]

In [166]:
join_hist[2000:2010]

['mitral valve regurgitation, hypertension',
 'no significant medical history',
 'no significant medical history',
 'no significant medical history',
 'asthma',
 'no significant medical history',
 'no significant medical history',
 'tia, high cholesterol, hypertension, hypothyroid',
 'no significant medical history',
 'asthma']

In [167]:
clin['pmhx'] = join_hist

In [168]:
subj_df = clin[['CleanSubjectiveNotes', 'pmhx']]

### exploring different diagnoses

In [169]:
diagnoses = Counter()
for hx in split_hist:
    for item in hx:
        diagnoses[item] += 1

In [170]:
print ('# of unique diagnoses:', len(diagnoses))

# of unique diagnoses: 9637


In [171]:
#sorted diagnoses
diagnoses.most_common()

[('no significant medical history', 29730),
 ('hypertension', 12552),
 ('high cholesterol', 6509),
 ('niddm', 5284),
 ('asthma', 2688),
 ('depression', 1856),
 ('thyroid', 1683),
 ('high bp', 1662),
 ('hypothyroid', 1523),
 ('anxiety', 1393),
 ('iddm', 1381),
 ('gerd', 1251),
 ('high chol', 790),
 ('arthritis', 775),
 ('acid reflux', 707),
 ('anemia', 636),
 ('copd', 499),
 ('cholesterol', 488),
 ('chol', 486),
 ('gout', 477),
 ('chf', 461),
 ('dementia', 419),
 ('diabetes', 406),
 ('cva', 390),
 ('seizure', 367),
 ('afib', 358),
 ('enlarged prostate', 343),
 ('osteoporosis', 331),
 ('schizophrenia', 324),
 ('dmii', 312),
 ('migraines', 289),
 ('atrial fibrillation', 279),
 ('cholestrol', 271),
 ('blood pressure', 265),
 ('chronic back pain', 260),
 ('smoker', 249),
 ('bipolar', 248),
 ('cad', 244),
 ('kidney stones', 243),
 ('urinary tract infection', 229),
 ('high cholestrol', 210),
 ('cardiac', 203),
 ('pacemaker', 200),
 ('thyroid problem', 199),
 ('low iron', 198),
 ('adhd', 184),

In [172]:
#this is a dictionary mapping the rank of the diagnosis to its name
dx_rank_dict = {item[0]:i+1 for i,item in enumerate(diagnoses.most_common())}

#this is a dictionary mapping the frequency of occurence to its name
dx_freq_dict = {item[0]:item[1]/len(diagnoses) for item in diagnoses.most_common()}

In [173]:
#trying to get a feel for how many diagoses are provided for each patient
split_hist_len = [len(item) for item in split_hist]
hxdf = pd.DataFrame(split_hist_len)

In [174]:
hxdf.describe()

Unnamed: 0,0
count,63474.0
mean,1.660302
std,1.24824
min,0.0
25%,1.0
50%,1.0
75%,2.0
max,21.0


In [175]:
lengths = Counter(split_hist_len); lengths

Counter({2: 8749,
         1: 43316,
         3: 5599,
         4: 3106,
         9: 67,
         6: 629,
         7: 272,
         5: 1423,
         0: 97,
         8: 147,
         11: 19,
         10: 32,
         12: 7,
         19: 1,
         21: 1,
         13: 5,
         15: 2,
         14: 1,
         17: 1})

In [176]:
#most patients have only a single diagnosis, 80% have 2 or less, 96% have 6 or less
for k,v in sorted(lengths.items()):
    print (k, '{0:2f}%'.format(v*100/len(split_hist_len)))

0 0.152818%
1 68.242115%
2 13.783596%
3 8.820935%
4 4.893342%
5 2.241863%
6 0.990957%
7 0.428522%
8 0.231591%
9 0.105555%
10 0.050414%
11 0.029934%
12 0.011028%
13 0.007877%
14 0.001575%
15 0.003151%
17 0.001575%
19 0.001575%
21 0.001575%


In [177]:
#this will create a list for each item where the top 6 diagnoses are listed in order
ordered_hist = []
for item in split_hist:
    #print (item)
    #print (len(item))
    top_6 = []
    for hx in item:
        #print (hx)
        #print (dx_rank_dict[hx])
        #print (dx_rank_dict[hx], hx)
        if item == 'nan':
            top_6.append(np.nan)
        else:
            top_6.append((dx_rank_dict[hx], hx))
    #print (top_6)
    sort = sorted(top_6)
    #print (sort)
    sort2 = [ii for i,ii in sort]
    #print(sort2)
    if len(sort2) > 6:
        sort2 = sort2[:5]
    if len(sort2) < 6:
        pad_req = 6-len(sort2)
        i = 0
        while i < pad_req:
            sort2.append(np.nan) #('n/a')
            i = i+1
        
    #print (top_6)
    #print (sorted(top_6))
    ordered_hist.append(sort2)
    #print ('next item')
    #print ()

In [178]:
medhx_df = pd.DataFrame(ordered_hist, columns = ['medhx' + str(i+1) for i in range(6)])
medhx_df.shape

(63474, 6)

In [179]:
ordered_hist[2000:2020]

[['hypertension', 'mitral valve regurgitation', nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['asthma', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['hypertension', 'high cholesterol', 'hypothyroid', 'tia', nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['asthma', nan, nan, nan, nan, nan],
 ['niddm', 'tia', nan, nan, nan, nan],
 ['pancreatitis', nan, nan, nan, nan, nan],
 ['hypertension', 'anemia', 'copd', 'dementia', 'cva', 'osteoarthritis'],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history', nan, nan, nan, nan, nan],
 ['no significant medical history',
  'no significant medical history

In [180]:
comorbids = []

for item in ordered_hist:
    res = [x for x in item if str(x) != 'nan']
    if len(res) == 0: comorbids.append(np.nan)
    else:
        res = [x for x in res if x != 'no significant medical history']
        #print (res, len(res))
        comorbids.append(len(res))

In [181]:
#so now I want to add this to the tabular data frame
tab_df['num_comorbids'] = comorbids

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [182]:
clin['num_comorbids'] = comorbids

In [185]:
clin.shape

(63474, 47)

In [189]:
#gonna merge the screening questions into the main dataframe
screening_df.index = clin.index

clin = pd.concat([clin,screening_df], axis = 1)

In [201]:
clin.shape

(63474, 57)

### now I'm going to make a couple of balanced datasets

In [202]:
Counter(clin.target), Counter(clin.target3)

(Counter({1: 56890, 2: 5792, 3: 696, 4: 96}), Counter({1: 56890, 0: 6584}))

In [203]:
admit_df = clin[clin.target3 == 0]

dc_df = clin[clin.target3==1]
dc_df = dc_df.sample(n= len(admit_df))  #this is the number of admissions from target 3


In [204]:
balanced_df1 = pd.concat([admit_df, dc_df], axis = 0)

In [205]:
balanced_df1.columns

Index(['ID', 'ChartNumber', 'EncounterNumber', 'TriageLevel', 'AgeNumber',
       'AgeInYrs', 'GenderDesc', 'Triage Date & Time', 'Reg Date & Time',
       'PIA Date & Time', 'Disposition Date & Time', 'DischargeDisposition',
       'DischargeDispositionDesc', 'Left ED Date & Time',
       'PresentingComplaint', 'PresentingComplaintDesc', 'MainDiagnosisCode',
       'MainDiagnosisCodeDesc', 'AdmitLocation', 'PatientService',
       'SubjectiveNotes', 'InfectionControlScreening', 'MedicalHistory',
       'BloodPressure_LastEDReading', 'O2Saturation_LastEDReading',
       'Pulse_LastEDReading', 'Temperature_LastEDReading', 'o2sat', 'pulse',
       'temp', 'CleanSubjectiveNotes', 'BP', 'systolic', 'diastolic', 'Gender',
       'outcome', 'target', 'service', 'target2', 'discharge', 'target3',
       'dispo', 'target4', 'ICUvsother', 'target5', 'pmhx', 'num_comorbids',
       'Are you feeling feverish or have had shakes or chills in the last 24 hours?',
       'Have you ever been isolated/

In [206]:
icu_df = clin[clin.target2 == 4]
madmit_df = clin[clin.target2 ==3].sample(n = len(icu_df))
sadmit_df = clin[clin.target2 ==2].sample(n = len(icu_df))
disc_df = clin[clin.target2 == 1].sample(n = len(icu_df))

In [207]:
#this is a dataframe with equal numbers of all 4 classes
balanced_df2 = pd.concat([icu_df, madmit_df, sadmit_df, disc_df], axis = 0)

In [208]:
non_icu_df = clin[clin.target2 !=4].sample(n = len(icu_df))

In [209]:
balanced_df3 = pd.concat([icu_df, non_icu_df], axis = 0)

In [210]:
balanced_df3.columns

Index(['ID', 'ChartNumber', 'EncounterNumber', 'TriageLevel', 'AgeNumber',
       'AgeInYrs', 'GenderDesc', 'Triage Date & Time', 'Reg Date & Time',
       'PIA Date & Time', 'Disposition Date & Time', 'DischargeDisposition',
       'DischargeDispositionDesc', 'Left ED Date & Time',
       'PresentingComplaint', 'PresentingComplaintDesc', 'MainDiagnosisCode',
       'MainDiagnosisCodeDesc', 'AdmitLocation', 'PatientService',
       'SubjectiveNotes', 'InfectionControlScreening', 'MedicalHistory',
       'BloodPressure_LastEDReading', 'O2Saturation_LastEDReading',
       'Pulse_LastEDReading', 'Temperature_LastEDReading', 'o2sat', 'pulse',
       'temp', 'CleanSubjectiveNotes', 'BP', 'systolic', 'diastolic', 'Gender',
       'outcome', 'target', 'service', 'target2', 'discharge', 'target3',
       'dispo', 'target4', 'ICUvsother', 'target5', 'pmhx', 'num_comorbids',
       'Are you feeling feverish or have had shakes or chills in the last 24 hours?',
       'Have you ever been isolated/

## saving dataframes to csv files

In [211]:
balanced_df1.to_csv(data_path + '/balanced_admit_dc_nlp_data.csv')

balanced_df2.to_csv(data_path +'/balanced_4cls_nlp_data.csv')

balanced_df3.to_csv(data_path +'/balanced_icuvsother_nlp_data.csv')

target_df.to_csv(data_path  + '/targets.csv')

screening_df.to_csv(data_path + '/inf_control_data.csv')

tab_df.to_csv(data_path + '/tabular_data.csv')

date_df.to_csv(data_path + '/data_data.csv')

subj_df.to_csv(data_path + '/subj_data.csv')

medhx_df.to_csv(data_path + '/med_hx.csv')

In [212]:
subj_df.head()

Unnamed: 0,CleanSubjectiveNotes,pmhx
511,complains of central chest tighypertensioness ...,"childhood heart murmur, drug abuse"
754,patient says noted blood in stool yesterday an...,seizure
755,"right flank pain since morning,on her periods ...","no significant medical history, ovarian cyst"
757,"abdo pain onset monday,not seen by family doctor",no significant medical history
758,"fell last monday landed on ground, "" dizzy"" at...","high cholesterol, 2 stents 2014, low bp"
