# Load IL LTC data from Web

## Doing
 - [ ] Add [CMS Federal Provider Number] to [df_facilities]

## To Do's
 - [ ] Check for new [State LTC Facility Name] in State feed (for use in identifying/looking for matching Name in CMS
 - [ ] Add CMS info to [df_facilities]
 - [ ] Fix so that we map County and Facility Name to CMS ProvNUM
 - [ ] Use FuzzyWuzzy to match names and any with score >95 use that [CMS Federal Provider Number] (if available)
 - [ ] Refactor to use only one urllib package
 
## Done
 - [x] Load [State LTC Facility Name] to [CMS Federal Provider Number] 


In [1]:
import pandas as pd
import urllib3 as urllib
import urllib.request as urllib2
import json
import glob
import IPython.display

pd.options.display.max_columns = None

# 0 - Define Functions

In [2]:
http = urllib.PoolManager()

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

In [3]:
def pull_IL_json_from_web():
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    #ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    f = open(file, "w")
    f.write(ltc_data_json)
    f.close()
    return file

In [4]:
def outbreak_df_from_file(filename):
    """ From Json file:
        1) return DataFrame augmented and save to file
        2) return Summary data"""
    with open(filename) as f:
      ltc_data = json.load(f)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
    df = pd.DataFrame(ltc_data['FacilityValues'])
    df.insert(0, 'reporting_date', reporting_date)
    df['CFR'] = (df['deaths'] / df['confirmed_cases'])
    df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility
    #Save Outbreak data to a file
    outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v2.csv'
    df.to_csv(outbreak_file, index = False)
    
    # Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
    deaths = ltc_data['LTC_Reported_Cases']['deaths']
    confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']
    facility_cnt = len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False))
    summary = {}
    summary['Date'] = reporting_date
    summary['Cases'] = confirmed_cases
    summary['Deaths'] = deaths
    summary['Outbreaks'] = df.reporting_date.value_counts()[0]
    summary['Facilities'] = facility_cnt
    
    return df, summary, reporting_date

In [5]:
def process_json_IL (filename, display_dfs=False, display_summary=True):
    """Process a JSON file to:
       1) Produce Summary Info
       2) Produce Outbreak file and dataframe
       3) Produce Facility file and dataframe
       4) Produce County file and dataframe
        
       TODO - make display dataframes optional
       TODO - make display summary info optional"""
    [outbreak_df, summary, reporting_date] = outbreak_df_from_file(filename)

    # Print Summary Data
    if display_summary:
        for k,v in summary.items():
            print(k + ": " + str(v))    

    # Save and Display Facility data
    df_facilities = outbreak_df.groupby(['County', 'FacilityName']).sum()
    df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
    df_facilities['facilities'] = 1
    df_facilities.insert(0, 'ReportingDate', reporting_date)
    df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v2.csv')

    # Save and Display County Level Data
    df_county = df_facilities.groupby(by=['County']).sum()
    df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
    df_county.insert(0, 'ReportingDate', reporting_date)
    filename = 'Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv'
    df_county.sort_values('confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv')
    
    
    if display_dfs:
        print("\nOutbreak Data\n=============")
        display(outbreak_df.sort_values(by='deaths', ascending=False).head(5))
        print("\nFacility Data\n=============")
        display(df_facilities.sort_values('deaths', ascending=False).head(10))
        print("\nCounty Data\n===========")
        display(df_county.sort_values(by='confirmed_cases', ascending=False).head(10))

    return reporting_date, summary, outbreak_df, df_facilities, df_county

# 1 - Pull JSON File from Website

In [6]:
json_file = pull_IL_json_from_web()
#!chmod 444 $json_file
with open(json_file) as f:
  ltc_data = json.load(f)
          
# Extract Reporting Data
reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
reporting_date

'2020-11-27'

# 2 - Load Supporting Files (CMS et al)

In [7]:
# Load Facility Name to CMS ID json file
fac2CMS_file = 'IL_FacilityName_to_CMS_ID.json'
with open(fac2CMS_file) as f:
  ltc_name2cms_id = json.load(f) 

# 3 - Process JSON File to Create Files and DFs
Data is at the Outbreak level. A Facility can have 1 to Many Outbreaks (not sure about 0).
Will create a file and data frame at the level of: Outbreaks, Facilities and Counties

In [8]:
print('Source File: ' + str(json_file))
[reporting_date, summary, outbreak_df, df_facilities, df_county] = process_json_IL(json_file, display_dfs=False)

Source File: Source_data/IL_2020-11-27_LTC_data_Source.json
Date: 2020-11-27
Cases: 45882
Deaths: 6047
Outbreaks: 1584
Facilities: 1319


# 4 - Connect to CMS Provider Number

In [9]:
def facility2CMSNum (facilityName):
    if facilityName in ltc_name2cms_id:
        return ltc_name2cms_id[facilityName]
    else:
        return "No Match"
df_facilities.reset_index(inplace=True)
#facility2CMSNum('Abbington Rehab Nursing Center')
df_facilities['county-facName']= df_facilities['County'].str.upper() + '-' + df_facilities['FacilityName'].str.upper()
df_facilities['CMS_ProvNum'] = df_facilities['county-facName'].apply(lambda x: facility2CMSNum(x))

In [10]:
cols = list(df_facilities.columns)#.remove('county-facName')
cols.remove('county-facName')
df_facilities[cols].to_csv('Reporting_data/IL_2020-11-27_Facilities_LTC_data_v3.csv', index=False)

In [11]:
df_facilities.sort_values(by='confirmed_cases', ascending=False)

Unnamed: 0,County,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum
718,Kankakee,Samuel H. Shapiro Developmental Center,2020-11-27,277,1,0.003610,2,1,KANKAKEE-SAMUEL H. SHAPIRO DEVELOPMENTAL CENTER,No Match
293,Cook,Ludeman Developmental Center,2020-11-27,263,9,0.034221,1,1,COOK-LUDEMAN DEVELOPMENTAL CENTER,No Match
206,Cook,City View Multi Care Center,2020-11-27,249,15,0.060241,1,1,COOK-CITY VIEW MULTI CARE CENTER,145850
760,Lake,Avantara Long Grove,2020-11-27,228,32,0.140351,1,1,LAKE-AVANTARA LONG GROVE,145868
346,Cook,Peterson Park Health Care,2020-11-27,221,35,0.158371,2,1,COOK-PETERSON PARK HEALTH CARE,145838
...,...,...,...,...,...,...,...,...,...,...
748,LaSalle,Streator Unlimited,2020-11-27,2,0,0.000000,1,1,LASALLE-STREATOR UNLIMITED,No Match
750,Lake,APERION CARE HIGHWOOD,2020-11-27,2,0,0.000000,1,1,LAKE-APERION CARE HIGHWOOD,145936
787,Lake,Lake Barrington Woods,2020-11-27,2,0,0.000000,1,1,LAKE-LAKE BARRINGTON WOODS,No Match
1214,Whiteside,Kreiders,2020-11-27,2,0,0.000000,1,1,WHITESIDE-KREIDERS,No Match


# 5 - Load data from CMS

In [12]:
# Load CMS Dataset from CMS website
url_csv = 'https://data.cms.gov/api/views/s2uc-8wxp/rows.csv?accessType=DOWNLOAD&api_foundry=true'

response = urllib2.urlopen(url_csv)
cms_data = pd.read_csv(response, parse_dates=['Week Ending'], dtype={'Provider Name': str})
max_date = cms_data['Week Ending'].max()
reporting_date = str(max_date)[0:10]
print(reporting_date)
display(cms_data.head(5))

cms_data_latest = cms_data[cms_data['Week Ending'] == max_date]

  interactivity=interactivity, compiler=compiler, result=result)


2020-11-15


Unnamed: 0,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Submitted Data,Passed Quality Assurance Check,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Resident Access to Testing in Facility,Laboratory Type Is State Health Dept,Laboratory Type Is Private Lab,Laboratory Type Is Other,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,Reason for Not Testing Residents - Lack of PPE for Personnel,Reason for Not Testing Residents - Lack of Supplies,Reason for Not Testing Residents - Lack of Access to Laboratory,Reason for Not Testing Residents - Lack of Access to Trained Personnel,Reason for Not Testing Residents - Uncertainty About Reimbursement,Reason for Not Testing Residents - Other,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,Reason for Not Testing Staff and/or Personnel - Lack of PPE for Personnel,Reason for Not Testing Staff and/or Personnel - Lack of Supplies,Reason for Not Testing Staff and/or Personnel - Lack of Access to Laboratory,Reason for Not Testing Staff and/or Personnel - Lack of Access to Trained Personnel,Reason for Not Testing Staff and/or Personnel - Uncertainty About Reimbursement,Reason for Not Testing Staff and/or Personnel - Other,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,County,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation
0,2020-08-30,175404,BUHLER SUNSHINE HOME,400 S BUHLER ROAD,BUHLER,KS,67522,Y,Y,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,55.0,50.0,,,,,Y,N,N,N,N,N,N,,N,N,N,N,N,N,Y,N,N,N,N,N,N,3-7 DAYS,N,N,N,N,N,N,,,,,0.0,1.0,0.0,0.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,0.0,0.0,,Reno,N,N,POINT (-97.775844 38.13229)
1,2020-06-21,175301,WICHITA PRESBYTERIAN MANOR,4700 W 13TH STREET NORTH,WICHITA,KS,67212,Y,Y,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,50.0,36.0,Y,N,Y,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,6.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,0.0,0.0,,Sedgwick,N,N,POINT (-97.397488 37.709017)
2,2020-09-20,175315,"PRATT OPERATOR, LLC",1221 LARIMER STREET,PRATT,KS,67124,Y,Y,0.0,1.0,0.0,4.0,0.0,2.0,1.0,1.0,0.0,0.0,45.0,45.0,,,,,Y,N,N,N,N,N,N,<1 DAY,Y,Y,N,N,N,N,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,N,Y,N,Y,1.0,85.0,Y,0.0,9.0,0.0,4.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,88.9,0.0,0.0,Pratt,N,N,POINT (-98.727443 37.656991)
3,2020-05-24,175263,GOOD SAMARITAN SOCIETY - OLATHE,20705 W 151ST STREET,OLATHE,KS,66061,Y,Y,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,140.0,136.0,Y,Y,Y,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,2.0,2.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,0.0,0.0,,Johnson,,,POINT (-94.825244 38.854533)
4,2020-09-06,185316,PRINCETON NURSING & REHABILITATION,1333 WEST MAIN STREET,PRINCETON,KY,42445,Y,Y,0.0,0.0,0.0,0.0,0.0,0.0,1.0,24.0,0.0,0.0,104.0,81.0,,,,,Y,N,N,N,N,N,N,,N,N,N,N,N,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,N,Y,N,N,,,,0.0,3.0,0.0,0.0,0.0,0.0,N,N,N,N,Y,Y,N,N,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,0.0,0.0,,Caldwell,N,N,POINT (-87.889741 37.113003)


In [13]:
myState = 'IL'
state_cms_data = cms_data[cms_data['Provider State'] == myState].copy()
#cms_data_latest = 
state_cms_data_latest = state_cms_data[state_cms_data['Week Ending'] == max_date]

In [14]:
cols=['Week Ending', 'Federal Provider Number', 'Provider Name',
       'Provider Address', 'Provider City', 'Provider State', 'County', 
       'Provider Zip Code', 'Submitted Data',
       'Residents Weekly Admissions COVID-19',
       'Residents Total Admissions COVID-19',
       'Residents Weekly Confirmed COVID-19',
       'Residents Total Confirmed COVID-19',
       'Residents Weekly Suspected COVID-19',
       'Residents Total Suspected COVID-19', 
       'Residents Weekly All Deaths',
       'Residents Total All Deaths', 
       'Residents Weekly COVID-19 Deaths',
       'Residents Total COVID-19 Deaths', 'Number of All Beds',
       'Total Number of Occupied Beds',
       'Staff Weekly Confirmed COVID-19', 'Staff Total Confirmed COVID-19',
       'Staff Weekly Suspected COVID-19', 'Staff Total Suspected COVID-19',
       'Staff Weekly COVID-19 Deaths', 'Staff Total COVID-19 Deaths',
       'Shortage of Nursing Staff', 'Shortage of Clinical Staff',
       'Shortage of Aides', 'Shortage of Other Staff',
       'Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents',
       'Weekly Resident COVID-19 Deaths Per 1,000 Residents',
       'Total Resident Confirmed COVID-19 Cases Per 1,000 Residents',
       'Total Resident COVID-19 Deaths Per 1,000 Residents',
       'Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases',
       'Three or More Confirmed COVID-19 Cases This Week',
       'Initial Confirmed COVID-19 Case This Week', 'Geolocation',
      
       'Resident Access to Testing in Facility',
       'Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days',
       'During Past Two Weeks Average Time to Receive Resident Test Results',
       'Has Facility Performed Resident Tests Since Last Report',
       'Tested Residents with New Signs or Symptoms',
       'Tested Asymptomatic Residents in a Unit or Section After a New Case',
       'Tested Asymptomatic Residents Facility-Wide After a New Case',
       'Tested Asymptomatic Residents Without Known Exposure as Surveillance',
       'Tested Another Subgroup of Residents',
       'Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days',
       'During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results',
       'Has Facility Performed Staff and/or Personnel Tests Since Last Report',
       'Tested Staff and/or Personnel with New Signs or Symptoms',
       'Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case',
       'Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case',
       'Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance',
       'Tested Another Subgroup of Staff and/or Personnel',
       'In-House Point-of-Care Test Machine',
       'COVID-19 Point-of-Care Tests Performed on Residents Since Last Report',
       'COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report',
       'Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine',
       'Any Current Supply of N95 Masks', 'One-Week Supply of N95 Masks',
       'Any Current Supply of Surgical Masks',
       'One-Week Supply of Surgical Masks',
       'Any Current Supply of Eye Protection',
       'One-Week Supply of Eye Protection', 'Any Current Supply of Gowns',
       'One-Week Supply of Gowns', 'Any Current Supply of Gloves',
       'One-Week Supply of Gloves', 'Any Current Supply of Hand Sanitizer',
       'One-Week Supply of Hand Sanitizer', 'Ventilator Dependent Unit',
       'Number of Ventilators in Facility',
       'Number of Ventilators in Use for COVID-19',
       'Any Current Supply of Ventilator Supplies',
       'One-Week Supply of Ventilator Supplies',
       ]

df_facilities_w_cms = pd.merge(df_facilities, state_cms_data_latest[cols], left_on='CMS_ProvNum', right_on='Federal Provider Number', how='left')

In [15]:
df_facilities_w_cms.head(10)

Unnamed: 0,County_x,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,County_y,Provider Zip Code,Submitted Data,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Resident Access to Testing in Facility,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies
0,Adams,Adams Pointe Senior Living,2020-11-27,4,0,0.0,1,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Adams,Bradford Villa,2020-11-27,8,1,0.125,1,1,ADAMS-BRADFORD VILLA,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Adams,Cedarhurst,2020-11-27,24,1,0.041667,1,1,ADAMS-CEDARHURST,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Adams,Chaddock,2020-11-27,3,0,0.0,1,1,ADAMS-CHADDOCK,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Adams,Golden Good Shepperd Home,2020-11-27,52,8,0.153846,1,1,ADAMS-GOLDEN GOOD SHEPPERD HOME,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,Adams,Good Samaritan Home,2020-11-27,28,1,0.035714,1,1,ADAMS-GOOD SAMARITAN HOME,145773,2020-11-15,145773.0,GOOD SAMARITAN HOME,2130 HARRISON STREET,QUINCY,IL,Adams,62301.0,Y,0.0,0.0,0.0,3.0,0.0,9.0,0.0,21.0,0.0,0.0,213.0,166.0,6.0,35.0,0.0,30.0,0.0,0.0,Y,N,Y,N,0.0,0.0,18.1,0.0,0.0,N,N,POINT (-91.380324 39.913547),,Y,1-2 DAYS,N,N,N,N,N,N,Y,1-2 DAYS,Y,N,N,Y,N,N,Y,3.0,320.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,
6,Adams,Illinois Veterans Home Quincy,2020-11-27,87,2,0.022989,1,1,ADAMS-ILLINOIS VETERANS HOME QUINCY,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,Adams,Quincy Terrace,2020-11-27,4,0,0.0,1,1,ADAMS-QUINCY TERRACE,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,Adams,St Vincents Nursing Home,2020-11-27,96,9,0.09375,1,1,ADAMS-ST VINCENTS NURSING HOME,145457,2020-11-15,145457.0,ST VINCENT'S HOME,1440 NORTH 10TH STREET,QUINCY,IL,Adams,62301.0,Y,5.0,16.0,0.0,61.0,0.0,17.0,4.0,34.0,4.0,10.0,90.0,55.0,2.0,30.0,0.0,16.0,0.0,0.0,Y,N,Y,N,0.0,72.7,1109.1,181.8,16.4,N,N,POINT (-91.400139 39.948897),,Y,1-2 DAYS,N,N,N,N,N,N,Y,1-2 DAYS,Y,N,N,N,Y,N,Y,0.0,0.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,
9,Adams,Sunset Home,2020-11-27,11,0,0.0,1,1,ADAMS-SUNSET HOME,145800,2020-11-15,145800.0,SUNSET HOME,418 WASHINGTON STREET,QUINCY,IL,Adams,62301.0,Y,0.0,0.0,0.0,6.0,0.0,2.0,0.0,15.0,0.0,0.0,160.0,112.0,3.0,14.0,0.0,7.0,0.0,0.0,N,N,N,N,0.0,0.0,53.6,0.0,0.0,N,N,POINT (-91.409341 39.92265),,Y,1-2 DAYS,Y,N,N,Y,N,N,Y,<1 DAY,Y,N,N,Y,N,N,Y,2.0,302.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,


In [31]:
df_facilities

Unnamed: 0,County,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum
0,Adams,Adams Pointe Senior Living,2020-11-27,4,0,0.000000,1,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match
1,Adams,Bradford Villa,2020-11-27,8,1,0.125000,1,1,ADAMS-BRADFORD VILLA,No Match
2,Adams,Cedarhurst,2020-11-27,24,1,0.041667,1,1,ADAMS-CEDARHURST,No Match
3,Adams,Chaddock,2020-11-27,3,0,0.000000,1,1,ADAMS-CHADDOCK,No Match
4,Adams,Golden Good Shepperd Home,2020-11-27,52,8,0.153846,1,1,ADAMS-GOLDEN GOOD SHEPPERD HOME,No Match
...,...,...,...,...,...,...,...,...,...,...
1314,Woodford,El Paso Health Care Center,2020-11-27,3,0,0.000000,1,1,WOODFORD-EL PASO HEALTH CARE CENTER,No Match
1315,Woodford,Heritage Health El Paso,2020-11-27,41,8,0.195122,1,1,WOODFORD-HERITAGE HEALTH EL PASO,145319
1316,Woodford,Snyder Village Assisted Living,2020-11-27,2,0,0.000000,1,1,WOODFORD-SNYDER VILLAGE ASSISTED LIVING,No Match
1317,Woodford,Snyder Village Health Center,2020-11-27,24,2,0.083333,1,1,WOODFORD-SNYDER VILLAGE HEALTH CENTER,No Match


# Play Area

In [29]:
# state_cms_data_latest.columns

In [17]:
state_cms_data_latest['Tested Asymptomatic Residents Without Known Exposure as Surveillance'].value_counts()
state_cms_data_latest['In-House Point-of-Care Test Machine'].value_counts()
field = 'Shortage of Nursing Staff'
field = 'Number of Ventilators in Use for COVID-19'
field = 'Tested Residents with New Signs or Symptoms'

fields = ['Shortage of Nursing Staff', 'Shortage of Clinical Staff',
       'Shortage of Aides', 'Shortage of Other Staff']
for field in fields:
    print(field + ": " + str(state_cms_data_latest[field].value_counts()['Y']))

#     display(state_cms_data_latest[field].value_counts())
#     print("")
    
for field in fields:
    print(field + ": " + str(cms_data_latest[field].value_counts()['Y']))
    #display(cms_data_latest[field].value_counts()['Y'])
    #print("")

Shortage of Nursing Staff: 140
Shortage of Clinical Staff: 33
Shortage of Aides: 151
Shortage of Other Staff: 92
Shortage of Nursing Staff: 2692
Shortage of Clinical Staff: 429
Shortage of Aides: 2999
Shortage of Other Staff: 1584


In [18]:
a = cms_data_latest[field].value_counts()
a['Y']

1584

In [33]:
cols = list(df_facilities_w_cms.columns)#.remove('county-facName')
cols.remove('county-facName')
df_facilities_w_cms.to_csv('Reporting_data/IL_2020-11-27_Facilities_LTC_data_v3.csv', index=False)

In [30]:
state_cms_data_latest.head(20)

Unnamed: 0,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Submitted Data,Passed Quality Assurance Check,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Resident Access to Testing in Facility,Laboratory Type Is State Health Dept,Laboratory Type Is Private Lab,Laboratory Type Is Other,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,Reason for Not Testing Residents - Lack of PPE for Personnel,Reason for Not Testing Residents - Lack of Supplies,Reason for Not Testing Residents - Lack of Access to Laboratory,Reason for Not Testing Residents - Lack of Access to Trained Personnel,Reason for Not Testing Residents - Uncertainty About Reimbursement,Reason for Not Testing Residents - Other,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,Reason for Not Testing Staff and/or Personnel - Lack of PPE for Personnel,Reason for Not Testing Staff and/or Personnel - Lack of Supplies,Reason for Not Testing Staff and/or Personnel - Lack of Access to Laboratory,Reason for Not Testing Staff and/or Personnel - Lack of Access to Trained Personnel,Reason for Not Testing Staff and/or Personnel - Uncertainty About Reimbursement,Reason for Not Testing Staff and/or Personnel - Other,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,County,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation
77025,2020-11-15,145431,LOFT REHABILITATION & NURSING,700 NORTH MAIN STREET,EUREKA,IL,61530,N,,0.0,1.0,0.0,0.0,0.0,17.0,0.0,8.0,0.0,0.0,104.0,79.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,3.0,0.0,8.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,,Woodford,,,POINT (-89.272977 40.729965)
295987,2020-11-15,145008,DUQUOIN NURSING & REHAB,514 EAST JACKSON ST,DU QUOIN,IL,62832,Y,Y,0.0,0.0,5.0,8.0,8.0,17.0,0.0,13.0,0.0,0.0,72.0,63.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,Y,N,Y,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,Y,N,Y,Y,N,Y,20.0,2.0,Y,4.0,6.0,1.0,11.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,79.4,0.0,127.0,0.0,0.0,Perry,Y,N,POINT (-89.229268 38.002019)
296150,2020-11-15,145102,MEMORIAL CARE CENTER,4315 MEMORIAL DRIVE,BELLEVILLE,IL,62226,Y,Y,5.0,67.0,0.0,27.0,0.0,0.0,0.0,6.0,0.0,6.0,82.0,50.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,N,,,,1.0,39.0,0.0,0.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,540.0,120.0,22.2,St. Clair,N,N,POINT (-90.01982700000002 38.5489)
296168,2020-11-15,145021,HEARTLAND OF MACOMB,8 DOCTORS LANE,MACOMB,IL,61455,Y,Y,0.0,14.0,0.0,30.0,0.0,2.0,0.0,20.0,0.0,12.0,80.0,43.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,Y,0.0,2.0,Y,0.0,15.0,0.0,20.0,0.0,1.0,N,N,N,N,Y,N,Y,N,Y,N,Y,N,Y,Y,Y,Y,N,,,,,0.0,0.0,697.7,279.1,40.0,McDonough,N,N,POINT (-90.662354 40.446774000000005)
296186,2020-11-15,145160,APERION CARE CAPITOL,555 WEST CARPENTER,SPRINGFIELD,IL,62702,Y,Y,0.0,7.0,0.0,1.0,0.0,2.0,0.0,2.0,0.0,0.0,251.0,88.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,N,Y,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,N,Y,N,Y,0.0,0.0,Y,2.0,9.0,0.0,70.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,11.4,0.0,0.0,Sangamon,N,N,POINT (-89.662793 39.807588)
296538,2020-11-15,145121,ALTON MEMORIAL REHAB & THERAPY,1251 COLLEGE AVENUE,ALTON,IL,62002,Y,Y,0.0,3.0,0.0,37.0,0.0,6.0,0.0,29.0,0.0,9.0,64.0,40.0,,,,,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,Y,N,N,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,Y,N,N,Y,0.0,98.0,Y,0.0,28.0,0.0,22.0,0.0,0.0,Y,N,Y,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,925.0,225.0,24.3,Madison,N,N,POINT (-90.162316 38.904106)
296672,2020-11-15,145000,WASHINGTON SENIOR LIVING,1201 NEWCASTLE,WASHINGTON,IL,61571,Y,N,0.0,0.0,21.0,21.0,2.0,5.0,0.0,23.0,0.0,0.0,122.0,94.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,Y,N,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,Y,Y,N,Y,30.0,18.0,Y,4.0,7.0,3.0,28.0,0.0,0.0,Y,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,223.4,0.0,223.4,0.0,0.0,Tazewell,Y,Y,POINT (-89.423219 40.709206)
297030,2020-11-15,145043,CITADEL CARE CENTER-KANKAKEE,900 WEST RIVER PLACE,KANKAKEE,IL,60901,Y,Y,0.0,10.0,3.0,49.0,0.0,4.0,0.0,40.0,0.0,0.0,107.0,85.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,Y,Y,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,Y,Y,Y,N,Y,8.0,5.0,Y,0.0,34.0,0.0,8.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,35.3,0.0,576.5,0.0,0.0,Kankakee,Y,N,POINT (-87.87645200000001 41.125639)
297276,2020-11-15,145142,ALDEN DEBES REHAB & HCC,550 SOUTH MULFORD AVENUE,ROCKFORD,IL,61108,Y,Y,0.0,7.0,5.0,14.0,0.0,4.0,1.0,32.0,0.0,6.0,268.0,160.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,Y,Y,N,Y,Y,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,Y,N,Y,Y,N,Y,5.0,76.0,Y,15.0,31.0,0.0,10.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,31.3,0.0,87.5,37.5,42.9,Winnebago,Y,N,POINT (-88.99967200000002 42.260641)
297492,2020-11-15,145016,HERITAGE HEALTH-BLOOMINGTON,700 EAST WALNUT,BLOOMINGTON,IL,61701,Y,Y,0.0,2.0,0.0,0.0,0.0,3.0,0.0,17.0,0.0,0.0,88.0,56.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,N,Y,N,Y,1.0,2.0,Y,0.0,8.0,0.0,17.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,0.0,0.0,,McLean,N,N,POINT (-88.984798 40.486935)


In [20]:
import numpy as np
df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'value': np.random.randn(4)})
df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
                    'value': np.random.randn(4)})

In [21]:
# show all records from df1
pd.merge(df1, df2, on='key', how='left')

Unnamed: 0,key,value_x,value_y
0,A,-0.433345,
1,B,-0.295338,-1.164575
2,C,-1.035385,
3,D,1.437523,-0.059558
4,D,1.437523,-1.248669


In [22]:
print(len(state_cms_data_latest))
print(len(state_cms_data_latest['Federal Provider Number'].unique()))
# TODO - add check these are equal (otherwise extra row will occur)

713
713


In [24]:
cms_data['Week Ending'].min()

Timestamp('2020-05-24 00:00:00')