In [1]:
from pipeline import *
from process_pdf import *

## Load the Data

#### Necessary Input from Discharge
1. Zipcode
2. bool_services: whether or not a patient needs nursing, physical therapy, occuptational therapy, speech pathology, social services, home health aide

In [2]:
zipcode = 94044
bool_services = [True]*6 

In [3]:
df = pd.read_csv('data/HH_Provider_Oct2020.csv')
df_cal = df[df['State'] == 'CA']
df_cal.reset_index(drop=True, inplace=True)
df_cal = df_cal.loc[:, ~df_cal.columns.str.startswith('Footnote')]
df_zip = pd.read_csv('data/HH_Zip_Oct2020.csv')
cms_nums = df_zip[df_zip[' ZIP Code'] == zipcode]['CMS Certification Number (CCN)']
df_cal = df_cal[df_cal['CMS Certification Number (CCN)'].isin(list(cms_nums))]

### Remove Stop Words & Get List of Words

In [4]:
word_list = text_process(text)

In [5]:
df_cal = renamed_qcols(df_cal)

In [6]:
# Create keyword lists
q3_keys = ["falling", "fall", "fell", "tripped", "trip", "tumble"]
q4_keys = ["depression", "mental", "bipolar", "sad", "upset", "trauma", "traumatic"]
q5_keys = ["flu"]
q6_keys = ["pneumonia", "pneumococcal"]
q7_keys = ["diabetes", "foot"]
q8_keys = ["walk", "walking", "moving"]
q9_keys = ["bed"]
q10_keys = ["bathing", "bathe", "bath"]
q11_keys = ["breath", "shortness of breath", "shortness", "breathing", "breathe", "oxygen"]
q12_keys = ["wound", "wounds", "cut", "slice", "lesion",\
            "gash", "puncture", "slash", "laceration", "tear",\
            "rent", "puncture", "slash", "sore", "graze", "scratch",\
            "scrape", "abrasion", "bruise", "contusion", "trauma"]
q16_keys = ["ulcer", "pressure", "skin", "swelling", "post-acute"]

In [7]:
q_keys = [q3_keys, q4_keys, q5_keys, q6_keys, q7_keys, q8_keys,
          q9_keys, q10_keys, q11_keys, q12_keys, q16_keys]

In [8]:
keys = [f'Q{i}' for i in [3,4,5,6,7,8,9,10,11,12,16]]

flags = list(map(lambda x: any(set(x).intersection(set(word_list))), q_keys))

flagged_qtopic = dict(zip(keys, flags))

In [9]:
flagged_qtopic

{'Q3': False,
 'Q4': False,
 'Q5': False,
 'Q6': False,
 'Q7': False,
 'Q8': False,
 'Q9': False,
 'Q10': False,
 'Q11': True,
 'Q12': False,
 'Q16': True}

## Pipeline Inputs

In [10]:
Q_flagged = [Q for (Q, bool_flag) in flagged_qtopic.items() if bool_flag] # questions that were flagged by NLP

In [11]:
Q_dict = {'Q3':'Falling', 'Q4':'Depression', 'Q5':'Flu', 'Q6': 'Pneumonia', 
          'Q7': 'Diabetes', 'Q8': 'Moving Around', 'Q9': 'Getting in Bed',
           'Q10': 'Bathing', 'Q11':'Breathing', 'Q12': 'Wounds', 'Q16': 'Skin Integrity'}

### Recommend() 
1. Currently, this is naively sorting by the order of the questions that were flagged
2. We should put weight to the questions somehow
3. We could also take into account PPR and DTC (Potentially Preventable Post-Discharge Readmission, Discharge to Community)

In [12]:
pipe_prep = Pipeline([('Drop Unnecessary Columns', Drop()),
                      ('Rename Columns', Rename()),
                      ('Filter Offered Services', FilterByService(bool_services)),
                      ('Recommend', Recommend(Q_flagged, 10))
                     ])

df = pipe_prep.fit_transform(df_cal.copy())

Filtered out places that don't accept whatever the patient needs
1. **nurse**: 'Offers Nursing Care Services'
2. **pt**: 'Offers Physical Therapy Services'
3. **ot**: 'Offers Occupational Therapy Services',
4. **speech**: 'Offers Speech Pathology Services',
5. **social**: 'Offers Medical Social Services',
6. **aide**: 'Offers Home Health Aide Services',

In [13]:
df

Unnamed: 0,ccn,name,address,city,zip,phone,nursing,pt,ot,speech,...,Q12,Q13,Q14,Q15,Q16,Q17,dtc,dtc_cat,ppr,ppr_cat
13,57071,KAISER FOUNDATION HOSP HOME HEALTH - SAN FRANC...,4131 GEARY BLVD,SAN FRANCISCO,94118,4158332770,Yes,Yes,Yes,Yes,...,89.1,73.0,,,0.13,91.0,,Not Available,,Not Available
21,57190,PATHWAYS HOME HEALTH AND HOSPICE,585 NORTH MARY AVENUE,SUNNYVALE,94085,4087305900,Yes,Yes,Yes,Yes,...,94.3,81.1,14.9,13.3,0.12,92.2,85.14,Better Than National Rate,2.82,Same As National Rate
163,557116,AMEDISYS HOME HEALTH CARE,"1350 BAYSHORE HWY, SUITE 777",BURLINGAME,94010,6503444020,Yes,Yes,Yes,Yes,...,99.6,86.2,13.1,12.1,0.0,93.9,79.72,Better Than National Rate,2.49,Same As National Rate
181,557290,PROFESSIONAL HOME CARE ASSOCIATES,2296 COUNTRY DRIVE,FREMONT,94536,5107979299,Yes,Yes,Yes,Yes,...,95.1,71.5,13.5,12.7,0.15,99.0,92.21,Better Than National Rate,1.51,Same As National Rate
253,557754,ASIAN AMERICAN HOME HEALTH,"1301 MARINA VILLAGE PARKWAY, SUITE 103",ALAMEDA,94501,5104331068,Yes,Yes,Yes,Yes,...,89.8,82.4,13.1,11.5,0.0,61.2,86.17,Better Than National Rate,3.45,Same As National Rate
267,557796,CROSSROADS HOME HEALTH & HOSPICE,"1109 VICENTE STREET, SUITE 103",SAN FRANCISCO,94116,4156822111,Yes,Yes,Yes,Yes,...,93.0,72.4,16.1,12.6,0.0,98.0,81.28,Better Than National Rate,2.62,Same As National Rate
299,58110,"NEW HAVEN HOME HEALTH SERVICES, INC.","333 GELLERT BOULEVARD, SUITE 249",DALY CITY,94015,6503011660,Yes,Yes,Yes,Yes,...,,67.3,14.4,12.5,0.14,98.7,80.86,Better Than National Rate,2.66,Same As National Rate
308,58123,SUTTER VISITING NURSE ASSOCIATION & HOSPICE,"1700 S AMPHLETT BLVD, SUITE 300",SAN MATEO,94402,6506852800,Yes,Yes,Yes,Yes,...,96.3,71.0,13.5,13.5,0.08,95.7,84.9,Better Than National Rate,2.25,Same As National Rate
438,58315,AMERICAN CAREQUEST,"819 COWAN ROAD, SUITE C-1",BURLINGAME,94010,4158859100,Yes,Yes,Yes,Yes,...,,70.5,12.5,15.0,0.03,99.9,81.44,Better Than National Rate,12.61,Same As National Rate
516,58425,"WARM SPRINGS HOME HEALTH, INC",54 WHITNEY PLACE,FREMONT,94539,5104906988,Yes,Yes,Yes,Yes,...,87.2,83.6,13.1,14.3,0.0,100.0,84.81,Better Than National Rate,3.5,Same As National Rate


In [14]:
print(f'The recommendation prioritized {[Q_dict[flag] for flag in Q_flagged]} in that order')

The recommendation prioritized ['Breathing', 'Skin Integrity'] in that order
