This notebook combines Rachel Gold (@rachgoldaz, RG) and Rosie Cima (@cimar, RC) distinct methodologies for flagging potentially pregnant patients among the EMTALA deficiencies reported by CMS.

In [203]:
import pandas

# Stop phrases for @rachgoldaz methodology for detecting pregnant patients. @cimar note: This is @rachgoldaz's attempt to remove boilerplate that contains her keywords. I minimally edited this, but took out "vaginal pain, abdominal bleeding" from this list because it didn't have a corresponding keyword
IGNORE_PHRASES_PASS_RG = ['pregnancy test', 'medical condition, and /or pregnancy within its capabilities', 'medical condition, and/or pregnancy within its capabilities','pregnancy/active labor','hospital does not do ultrasounds except for pregnancy','the hospital does not do ultrasounds except for pregnancy','treatment based on how far along they were in their pregnancy','relates to pregnancy','during her pregnancy could be assessed for labor by a labor and delivery nurse','A preterm or premature baby is delivered before 37 weeks of the pregnancy','medical screening examinations for patients with pregnancy-related conditions under standardized procedures','urine test for pregnancy','urine pregnancy test if potential for pregnancy','except for pregnancy','citizenship, religion, pregnancy','without pregnancy','pregnancy and childbirth','no intrauterine pregnancy','urine pregnancy','Drug screen, Urine, pregnancy','complaints were not related to pregnancy','presenting to ED with pregnancy greater than 20 weeks','complaint is non-pregnancy related','An evaluation sufficient to determine if an emergency medical condition or pregnancy with contractions exists','possible EMCs related to pregnancy','If an emergency medical condition or pregnancy with contractions is present, the hospital must provide such additional medical examination and treatment','A minor who understands the nature and consequences of treatment is capable of consenting if the minor is 18 years of age or older, graduated from high school, has married, has been pregnant, needs diagnosis or treatment of pregnancy or venereal disease, or is 14 years of age or older and requests psychiatric treatment','someone in need of emergency care for a psychiatric or pregnancy-relations condition','discussion with prophylaxis against pregnancy','In pregnancy at-term, stabilization includes delivery of the child and the placenta','Using screen for pregnancy','Abdominal pain - any female of childbearing age requiring diagnostic testing to determine pregnancy','policy when presenting unscheduled for pregnancy related emergency care']

# Keywords for @rachgoldaz methodology for detecting pregnant patients
KEYWORDS_RG = ["weeks pregnant","miscarried","stillborn","water breaking","water broke","weeks gestation","weeks' gestation","weeks with labor","week pregnant","she was pregnant","was pregnant","water had broken","was in labor","was in active labor","was born","was noticeably pregnant","year old pregnant","months pregnant","months gestation","wks (weeks) preg (pregnant)","currently pregnant","weeks of pregnancy","gestational age","leaking amniotic fluid","wks (weeks)","pregnancy"]

# Stop phrases @cimar methodology for detecting pregnant patients
IGNORE_PHRASES_PASS_RC = ["pregnancy test","test for pregnancy","active labor act","active labor (sic) act"]

# Keywords for @cimar methodology for detecting pregnant patients
KEYWORDS_RC = ['gravid','pregnan','eclampsia','caeserian',' c-section',' csection',' c section',' para ','gestation','water break','water broke','active labor','obstetr']

# EMTALA deficiency codes: (2400, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411)
EMTALA_RANGE = range(2400,2412)

q3pt1 = pandas.read_excel("data/source/Hospital_2567s_2022Q3/Hospital 2567s - 2022Q3 Part 1.xlsx")
q3pt2 = pandas.read_excel("data/source/Hospital_2567s_2022Q3/Hospital 2567s - 2022Q3 Part 2.xlsx")

q3 = pandas.concat([q3pt1,q3pt2])

# This function standardizes the text field by lowercasing it and removing the phrases in the ignore_words list
def std_text(insp_text, ignore_words):
    std_text = insp_text.lower()
    for p in ignore_words:
        std_text = std_text.replace(p,"")
    return std_text

# @rachgoldaz's methodology looks for keywords in the standardized text field (std_text_rg)
def search_kw(std_text,kw):
    for k in kw:
        if k in std_text:
            return True
    return False

def make_key_identifier(row):
    return str(row["deficiency_tag"]).split(".")[0] + " " + str(row["EVENT_ID"])

q3["key_identifier"] = q3.apply(make_key_identifier, axis=1)
q3["std_text_rg"] = q3["inspection_text"].apply(std_text, ignore_words=IGNORE_PHRASES_PASS_RG)
q3["std_text_rc"] = q3["inspection_text"].apply(std_text, ignore_words=IGNORE_PHRASES_PASS_RC)
q3["may_be_pregnant_rg"] = q3["std_text_rg"].apply(search_kw, kw=KEYWORDS_RG)

In [204]:
import re

# The inspection text often has phrases that identify single, anonymous patients. These are the substrings and regex that indicate a patient identifier.
PATIENT_ID_STRS = ["patient #","patient id #", "pt #", "pi #", "(pi) #"]
PATIENT_ID_REGEX = ["patient \\d","patient id \\d","pt \\d", "pi \\d", "(pi) \\d"]

WINDOW_AFTER = 200
WINDOW_BEFORE = 100

# @cimar's methodology is two-part. This function looks for keywords in the vicinity of patient identifiers (defined as substrings or regex). It returns the first keyword found -- if it finds one. Otherwise it returns None.
def may_be_preg_rc_near_text(text):
    for p in PATIENT_ID_STRS:
        inc = 0
        while True:
            ind = text.find(p,inc)
            if ind < 0:
                break
            start = ind - WINDOW_BEFORE
            end = ind + WINDOW_AFTER
            substr = text[start:end]
            k = search_kw(substr,KEYWORDS_RC)
            for k in KEYWORDS_RC:
                if k in substr:
                    return k
            inc = ind+len(p)
    for r in PATIENT_ID_REGEX:
        inc = 0
        while True:
            contains_regex = re.search(r,text[inc:])
            if not contains_regex:
                break
            ind = contains_regex.span()[0]
            start = ind - WINDOW_BEFORE
            end = ind + WINDOW_AFTER
            substr = text[start:end]
            for k in KEYWORDS_RC:
                if k in substr:
                    return k
            inc += contains_regex.span()[1]
    return None

# @cimar's methodology is two-part. This function looks for keywords in the same paragraphs as patient identifiers (defined as substrings or regex). It returns the first keyword found -- if it finds one. Otherwise it returns None.
def may_be_preg_rc_graf(text):
    grafs = text.split("\n")
    for g in grafs:
        for p in PATIENT_ID_STRS:
            if p in g:
                for k in KEYWORDS_RC:
                    if k in g:
                        return k
        for r in PATIENT_ID_REGEX:
            if re.search(r,g):
                for k in KEYWORDS_RC:
                    if k in g:
                        return k
    return None

def is_emtala_deficiency(code):
    if code in EMTALA_RANGE:
        return True
    return False

q3["may_be_pregnant_rc_near_text"] = q3["std_text_rc"].apply(may_be_preg_rc_near_text)
q3["may_be_pregnant_rc_graf"] = q3["std_text_rc"].apply(may_be_preg_rc_graf)

In [205]:
q3[q3["key_identifier"]=="2405 Z9WC11"]

Unnamed: 0,facility_name,hospital_type,facility_id,address,city,state,deficiency_tag,missing_survey_tag_count,dfcncy_desc,defpref,inspection_date,EVENT_ID,inspection_text,key_identifier,std_text_rg,std_text_rc,may_be_pregnant_rg,may_be_pregnant_rc_near_text,may_be_pregnant_rc_graf
1164,EASTERN PLUMAS HOSPITAL - PORTOLA CAMPUS,Critical Access Hospitals,51300,500 FIRST AVENUE,PORTOLA,CA,2405.0,,EMERGENCY ROOM LOG,C,2020-01-30,Z9WC11,_x000d_\n_x000d_\nBased on interview and reco...,2405 Z9WC11,_x000d_\n_x000d_\nbased on interview and reco...,_x000d_\n_x000d_\nbased on interview and reco...,False,,


In [206]:
# Passing @rachgoldaz's methodology or either part of @cimar's methodology AND having an EMTALA deficiency code flags the case for manual review.
emtala_may_be_preg = q3[((~q3["may_be_pregnant_rc_graf"].isnull())|(~q3["may_be_pregnant_rc_near_text"].isnull())|(q3["may_be_pregnant_rg"]))&(q3["deficiency_tag"].apply(is_emtala_deficiency))]

# Export for manual review -- there are csv encoding issues with the inspection text field, so I'm exporting to Excel.
emtala_may_be_preg.to_excel("data/processed/emtala_may_be_preg.xlsx",index=False)

emtala_may_be_preg.shape

(789, 19)

In [207]:
# Importing the manually reviewed spreadsheet

manual_review = pandas.read_excel("data/manual/EMTALA_PREGNANT_2011-2022.xlsx")
manual_review["key_id_2"] = manual_review.apply(make_key_identifier, axis=1)
manual_review_keys = pandas.Series(manual_review["key_id_2"].unique())

emtala_may_be_preg[~(emtala_may_be_preg["key_identifier"].isin(manual_review_keys))]

Unnamed: 0,facility_name,hospital_type,facility_id,address,city,state,deficiency_tag,missing_survey_tag_count,dfcncy_desc,defpref,inspection_date,EVENT_ID,inspection_text,key_identifier,std_text_rg,std_text_rc,may_be_pregnant_rg,may_be_pregnant_rc_near_text,may_be_pregnant_rc_graf
252,MANIILAQ HEALTH CENTER,Critical Access Hospitals,21310,PO BOX 43,KOTZEBUE,AK,2406.0,,MEDICAL SCREENING EXAM,C,2012-12-07,DNZO11,**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,2406 DNZO11,**note- terms in brackets have been edited to ...,**note- terms in brackets have been edited to ...,False,,obstetr
1501,SURPRISE VALLEY COMMUNITY HOSPITAL,Critical Access Hospitals,51308,741 NORTH MAIN STREET,CEDARVILLE,CA,2406.0,,MEDICAL SCREENING EXAM,C,2016-04-27,2QI511,**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,2406 2QI511,**note- terms in brackets have been edited to ...,**note- terms in brackets have been edited to ...,False,pregnan,
2708,SOUTHWEST MEMORIAL HOSPITAL,Critical Access Hospitals,61327,1311 N MILDRED RD,CORTEZ,CO,2402.0,,POSTING OF SIGNS,C,2018-10-25,C4HB11,_x000d_\n_x000d_\nBased on observation and in...,2402 C4HB11,_x000d_\n_x000d_\nbased on observation and in...,_x000d_\n_x000d_\nbased on observation and in...,True,,
6186,PALO ALTO COUNTY HOSPITAL,Critical Access Hospitals,161357,3201 1ST STREET,EMMETSBURG,IA,2400.0,,COMPLIANCE WITH 489.24,C,2015-02-17,3P9F11,_x000d_\n_x000d_\n\nBased on a review of poli...,2400 3P9F11,_x000d_\n_x000d_\n\nbased on a review of poli...,_x000d_\n_x000d_\n\nbased on a review of poli...,False,obstetr,
8268,TRIGG COUNTY HOSPITAL,Critical Access Hospitals,181304,254 MAIN STREET,CADIZ,KY,2405.0,,EMERGENCY ROOM LOG,C,2015-05-01,46Q811,**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,2405 46Q811,**note- terms in brackets have been edited to ...,**note- terms in brackets have been edited to ...,False,pregnan,pregnan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22356,CASCADE VALLEY HOSPITAL,Short Term,500060,330 S STILLAGUAMISH AVE,ARLINGTON,WA,2406.0,,MEDICAL SCREENING EXAM,A,2021-12-07,CGF311,"_x000d_\n_x000d_\n. \nBased on interview, doc...",2406 CGF311,"_x000d_\n_x000d_\n. \nbased on interview, doc...","_x000d_\n_x000d_\n. \nbased on interview, doc...",True,,
23605,ASCENSION NE WISCONSIN MERCY CAMPUS,Short Term,520048,500 S OAKWOOD RD,OSHKOSH,WI,2400.0,,COMPLIANCE WITH 489.24,A,2016-05-19,ZSG311,"_x000d_\n_x000d_\nBased on observation, inter...",2400 ZSG311,"_x000d_\n_x000d_\nbased on observation, inter...","_x000d_\n_x000d_\nbased on observation, inter...",True,obstetr,
23606,ASCENSION NE WISCONSIN MERCY CAMPUS,Short Term,520048,500 S OAKWOOD RD,OSHKOSH,WI,2402.0,,POSTING OF SIGNS,A,2016-05-19,ZSG311,"_x000d_\n_x000d_\nBased on observation, inter...",2402 ZSG311,"_x000d_\n_x000d_\nbased on observation, inter...","_x000d_\n_x000d_\nbased on observation, inter...",True,,
23653,ASCENSION COLUMBIA ST MARY'S HOSPITAL MILWAUKEE,Short Term,520051,2323 N LAKE DR,MILWAUKEE,WI,2400.0,,COMPLIANCE WITH 489.24,A,2019-12-19,TX7M11,"_x000d_\n_x000d_\n\nBased on record review, o...",2400 TX7M11,"_x000d_\n_x000d_\n\nbased on record review, o...","_x000d_\n_x000d_\n\nbased on record review, o...",False,obstetr,obstetr


In [208]:
is_pregnant = emtala_may_be_preg[(emtala_may_be_preg["key_identifier"].isin(manual_review_keys))]

is_pregnant.to_excel("data/processed/manual_recombo.xlsx",index=False)
is_pregnant.shape

(694, 19)

In [214]:
count_preg_emtala = is_pregnant["EVENT_ID"].nunique()

count_all_emtala = q3[q3["deficiency_tag"].apply(is_emtala_deficiency)]["EVENT_ID"].nunique()
print(count_preg_emtala)
print(count_all_emtala)
count_preg_emtala/count_all_emtala

431
2694


0.15998515219005197

In [218]:
# Same as the above cell but limited to cases where the inspection_date is 2013 or later.
count_preg_emtala = is_pregnant[is_pregnant["inspection_date"]>="2013-01-01"]["EVENT_ID"].nunique()
count_all_emtala = q3[(q3["deficiency_tag"].apply(is_emtala_deficiency))&(q3["inspection_date"]>="2013-01-01")]["EVENT_ID"].nunique()
print(count_preg_emtala)
print(count_all_emtala)
count_preg_emtala/count_all_emtala

359
2250


0.15955555555555556

In [210]:
old_additions = pandas.read_csv("data/processed/in_rc_not_rg--just_emtala.csv")
old_additions["key_identifier"] = old_additions["key_thing"].apply(lambda x: x.split("-")[1] + " " + x.split("-")[0])
old_add_keys = old_additions["key_identifier"].unique()

just_rc_method = emtala_may_be_preg[(~emtala_may_be_preg["may_be_pregnant_rc_graf"].isnull() | ~emtala_may_be_preg["may_be_pregnant_rc_near_text"].isnull()) & (emtala_may_be_preg["may_be_pregnant_rg"]==False)]

a_few_more = just_rc_method[~(just_rc_method["key_identifier"].isin(old_add_keys))]
a_few_more.to_excel("data/processed/emtala_new_to_check.xlsx",index=False)
a_few_more.shape


(0, 19)

In [169]:
manual_review_keys[~manual_review_keys.isin(emtala_may_be_preg["key_identifier"])]

123                                              nan nan
330     Hospital #1's Staffing Schedule review  patie...
331                            Nephrology  Pulmonologist
332     was diagnosed    with a ruptured appendix and...
333     oxygen  bleeding controlled and a dressing wa...
334                                             #18  #24
336     was diagnosed    with a ruptured appendix and...
536                                              581 nan
551                                             2018 nan
553                                              596 nan
dtype: object

In [170]:
manual_review[(manual_review["key_identifier"].duplicated())&(manual_review["key_identifier"].notna())][["deficiency_tag","EVENT_ID","key_identifier"]]["key_identifier"].tolist()

[' Patient #1',
 '2400 XI4411',
 '2406 XI4411',
 '2407 XI4411',
 '2411 0I8X11',
 '2407 HNOX11']

In [171]:
manual_review[(manual_review["Unnamed: 0"].notna()) & (manual_review["key_identifier"].isna())]

Unnamed: 0.1,Unnamed: 0,facility_name,hospital_type,facility_id,address,city,state,deficiency_tag,dfcncy_desc,inspection_date,...,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,key_id_2
122,123,WIREGRASS MEDICAL CENTER,Short Term,10062,1200 W MAPLE AVENUE,GENEVA,AL,2406,MEDICAL SCREENING EXAM,2017-06-29 00:00:00,...,,,,,,,,,,2406 8GFE11
124,ng evaluation or treatment for a medical condi...,if such condition exists either (a) stabilize...,,,,,,,,,...,,,,,,,,,,nan nan
126,"An """"emergency medical condition"""" is one with...",**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,,,,,,,,,...,,,,,,,,,,nan nan
128,ng evaluation or treatment for a medical condi...,if such condition exists either (a) stabilize...,,,,,,,,,...,,,,,,,,,,nan nan
130,"An """"emergency medical condition"""" is one with...",123,2406 8GFE11,2017,134,,,,,,...,,,,,,,,,,nan nan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1040,included: a HPI,a medical and psychiatric history,a ROS,and a physical exam was initiated. Review of ...,"""""4-year""",**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,,,,,...,,,,,,,,,,nan nan
1042,included: a HPI,a medical and psychiatric history,a ROS,and a physical exam was initiated. Review of ...,"""""4-year""",543,2407 2KF511,2018,595,,...,,,,,,,,,,2018 nan
1043,544,TRISTAR CENTENNIAL MEDICAL CENTER,Short Term,440161,2300 PATTERSON STREET,NASHVILLE,TN,2409,APPROPRIATE TRANSFER,2018-01-08 00:00:00,...,,,,,,,,,,2409 2KF511
1045,ded a HPI,a medical and psychiatric history,a ROS and physical exam was initiated. ED PA...,""""" ...C""",**NOTE- TERMS IN BRACKETS HAVE BEEN EDITED TO ...,,,,,,...,,,,,,,,,,nan nan


In [172]:
manual_review["facility_id"].nunique()

426