In [1]:
from pipeline import *
from process_pdf import *
! python process_pdf.py

## Load the Data

#### Necessary Input from Discharge
1. Zipcode
2. bool_services: whether or not a patient needs nursing, physical therapy, occuptational therapy, speech pathology, social services, home health aide

In [2]:
zipcode = 94044
bool_services = [True]*6 

In [3]:
df_cal = load_df(zipcode)

### Remove Stop Words & Get List of Words

In [4]:
word_list = text_process(text)

In [5]:
df_cal = renamed_qcols(df_cal)

In [6]:
# Create keyword lists
q3_keys = ["falling", "fall", "fell", "tripped", "trip", "tumble"]
q4_keys = ["depression", "mental", "bipolar", "sad", "upset", "trauma", "traumatic"]
q5_keys = ["flu"]
q6_keys = ["pneumonia", "pneumococcal"]
q7_keys = ["diabetes", "foot"]
q8_keys = ["walk", "walking", "moving"]
q9_keys = ["bed"]
q10_keys = ["bathing", "bathe", "bath"]
q11_keys = ["breath", "shortness of breath", "shortness", "breathing", "breathe", "oxygen"]
q12_keys = ["wound", "wounds", "cut", "slice", "lesion",\
            "gash", "puncture", "slash", "laceration", "tear",\
            "rent", "puncture", "slash", "sore", "graze", "scratch",\
            "scrape", "abrasion", "bruise", "contusion", "trauma"]
q16_keys = ["ulcer", "pressure", "skin", "swelling", "post-acute"]

In [7]:
q_keys = [q3_keys, q4_keys, q5_keys, q6_keys, q7_keys, q8_keys,
          q9_keys, q10_keys, q11_keys, q12_keys, q16_keys]

In [8]:
keys = [f'Q{i}' for i in [3,4,5,6,7,8,9,10,11,12,16]]

flags = list(map(lambda x: any(set(x).intersection(set(word_list))), q_keys))

flagged_qtopic = dict(zip(keys, flags))

In [9]:
flagged_qtopic

{'Q3': False,
 'Q4': False,
 'Q5': False,
 'Q6': False,
 'Q7': False,
 'Q8': False,
 'Q9': False,
 'Q10': False,
 'Q11': True,
 'Q12': False,
 'Q16': True}

## Pipeline Inputs

In [10]:
Q_flagged = [Q for (Q, bool_flag) in flagged_qtopic.items() if bool_flag] # questions that were flagged by NLP

In [11]:
Q_dict = {'Q3':'Falling', 'Q4':'Depression', 'Q5':'Flu', 'Q6': 'Pneumonia', 
          'Q7': 'Diabetes', 'Q8': 'Moving Around', 'Q9': 'Getting in Bed',
           'Q10': 'Bathing', 'Q11':'Breathing', 'Q12': 'Wounds', 'Q16': 'Skin Integrity'}

### Recommend() 
1. Currently, this is naively sorting by the order of the questions that were flagged
2. We should put weight to the questions somehow
3. We could also take into account PPR and DTC (Potentially Preventable Post-Discharge Readmission, Discharge to Community)

In [12]:
pipe_prep = Pipeline([('Drop Unnecessary Columns', Drop()),
                      ('Rename Columns', Rename()),
                      ('Filter Offered Services', FilterByService(bool_services)),
                      ('Recommend', Recommend(Q_flagged, 10))])

df = pipe_prep.fit_transform(df_cal.copy())

Filtered out places that don't accept whatever the patient needs
1. **nurse**: 'Offers Nursing Care Services'
2. **pt**: 'Offers Physical Therapy Services'
3. **ot**: 'Offers Occupational Therapy Services',
4. **speech**: 'Offers Speech Pathology Services',
5. **social**: 'Offers Medical Social Services',
6. **aide**: 'Offers Home Health Aide Services',

In [14]:
df

Unnamed: 0,ccn,name,address,city,zip,phone,date,star,Q1,Q2,...,Q12,Q13,Q14,Q15,Q16,Q17,dtc,dtc_cat,ppr,ppr_cat
1185,59725,21ST CENTURY HOME HEALTH SERVICES INC.,2000 VAN NESS AVENUE SUITE 608,SAN FRANCISCO,94109,4158012651,07/20/2016,5.0,99.8,98.8,...,96.4,100.0,11.9,14.5,0.03,99.0,84.68,Better Than National Rate,2.7,Same As National Rate
1026,59551,HEALTHFLEX HOME HEALTH SERVICES,"303 HEGENBERGER ROAD, SUITE 388",OAKLAND,94621,5105531900,12/24/2013,5.0,99.8,98.8,...,95.6,97.0,14.3,14.4,0.01,50.8,75.97,Better Than National Rate,2.83,Same As National Rate
163,557116,AMEDISYS HOME HEALTH CARE,"1350 BAYSHORE HWY, SUITE 777",BURLINGAME,94010,6503444020,04/05/1991,5.0,99.3,99.9,...,99.6,86.2,13.1,12.1,0.0,93.9,79.72,Better Than National Rate,2.49,Same As National Rate
784,59273,ALERT HOME HEALTH CARE,"11501 DUBLIN BOULEVARD, SUITE 200",DUBLIN,94568,9255582798,02/16/2011,3.5,97.5,100.0,...,,71.1,11.9,11.2,0.0,99.3,96.05,Better Than National Rate,9.65,Same As National Rate
624,59071,ANX HOME HEALTHCARE,"455 HICKEY BLVD, SUITE 415",DALY CITY,94015,6509915177,10/08/2009,4.5,98.6,97.1,...,95.9,84.3,16.1,12.9,0.16,93.8,70.69,Same As National Rate,5.12,Same As National Rate
1089,59628,"HELPING HANDS HOME HEALTH CARE & HOSPICE, INC.","1710 S AMPHLETT BLVD, SUITE 112",SAN MATEO,94402,6502869000,11/01/2014,5.0,100.0,100.0,...,,80.3,12.3,12.1,0.0,99.6,80.06,Better Than National Rate,2.22,Same As National Rate
852,59354,BLIZE HEALTHCARE,"750 ALFRED NOBEL DR, SUITE 204",HERCULES,94547,5106478057,02/10/2012,4.5,97.3,97.8,...,91.5,83.3,12.9,14.4,0.0,91.8,65.3,Worse Than National Rate,5.88,Same As National Rate
1347,53193,HOME HEALTH BAY AREA INC.,901 CAMPUS DRIVE STE 301,DALY CITY,94015,6500000000,08/22/2018,4.5,99.0,98.1,...,,90.2,14.3,12.8,0.15,100.0,75.28,Same As National Rate,5.41,Same As National Rate
1028,59554,CARE IN TOUCH HOME HEALTH AGENCY,"41829 ALBRAE STREET, SUITE 218",FREMONT,94538,5106518500,10/04/2013,4.5,99.0,100.0,...,86.7,83.7,16.4,13.9,0.15,95.9,83.33,Better Than National Rate,1.1,Same As National Rate
1165,59682,"ADVANCED HEALTHCARE SERVICES, LLC","39899 BALENTINE DRIVE, SUITE 365",NEWARK,94560,8554212267,01/08/2016,3.5,94.9,99.8,...,93.2,77.8,13.5,12.5,0.11,95.9,83.62,Better Than National Rate,2.79,Same As National Rate


In [19]:
df[['name', 'star'] + Q_flagged]

Unnamed: 0,name,star,Q11,Q16
1185,21ST CENTURY HOME HEALTH SERVICES INC.,5.0,100.0,0.03
1026,HEALTHFLEX HOME HEALTH SERVICES,5.0,100.0,0.01
163,AMEDISYS HOME HEALTH CARE,5.0,98.7,0.0
784,ALERT HOME HEALTH CARE,3.5,97.8,0.0
624,ANX HOME HEALTHCARE,4.5,96.8,0.16
1089,"HELPING HANDS HOME HEALTH CARE & HOSPICE, INC.",5.0,95.5,0.0
852,BLIZE HEALTHCARE,4.5,95.5,0.0
1347,HOME HEALTH BAY AREA INC.,4.5,94.1,0.15
1028,CARE IN TOUCH HOME HEALTH AGENCY,4.5,92.8,0.15
1165,"ADVANCED HEALTHCARE SERVICES, LLC",3.5,92.3,0.11


In [28]:
print(f'The recommendation prioritized {[Q_dict[flag] for flag in Q_flagged] + ["Star Rating"]} in that order')

The recommendation prioritized ['Breathing', 'Skin Integrity', 'Star Rating'] in that order
