# Load IL LTC data from Web

## Doing
 - [ ] add CMS ProvNum from lookup automatically

## To Do's
 - [ ] Check for new [State LTC Facility Name] in State feed (for use in identifying/looking for matching Name in CMS
 - [ ] Fix so that we map County and Facility Name to CMS ProvNUM
 - [ ] Use FuzzyWuzzy to match names and any with score >95 use that [CMS Federal Provider Number] (if available)
 - [ ] Refactor to use only one urllib package
 - [ ] fix DF naming to be standard convention (look up to see if there is one)
 
## Done
 - [x] Add [CMS Federal Provider Number] to [df_facilities]
 - [x] Add CMS info to [df_facilities]
 - [x] Load [State LTC Facility Name] to [CMS Federal Provider Number]
 - [x] generalize add [CMS Federal Provider Number] and adding CMS info to a DataFrame



In [1]:
import pandas as pd
import urllib3 as urllib
import urllib.request as urllib2
import json
import glob
import IPython.display

pd.options.display.max_columns = None

# 0 - Define Functions

In [2]:
http = urllib.PoolManager()

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

In [3]:
def pull_IL_json_from_web():
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    #ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    f = open(file, "w")
    f.write(ltc_data_json)
    f.close()
    return file

In [4]:
def outbreak_df_from_file(filename):
    """ From Json file:
        1) return DataFrame augmented and save to file
        2) return Summary data"""
    with open(filename) as f:
      ltc_data = json.load(f)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
    df = pd.DataFrame(ltc_data['FacilityValues'])
    df.insert(0, 'reporting_date', reporting_date)
    df['CFR'] = (df['deaths'] / df['confirmed_cases'])
    df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility
    #Save Outbreak data to a file
    outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v2.csv'
    df.to_csv(outbreak_file, index = False)
    
    # Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
    deaths = ltc_data['LTC_Reported_Cases']['deaths']
    confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']
    facility_cnt = len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False))
    summary = {}
    summary['Date'] = reporting_date
    summary['Cases'] = confirmed_cases
    summary['Deaths'] = deaths
    summary['Outbreaks'] = df.reporting_date.value_counts()[0]
    summary['Facilities'] = facility_cnt
    
    return df, summary, reporting_date

In [5]:
def process_json_IL (filename, display_dfs=False, display_summary=True):
    """Process a JSON file to:
       1) Produce Summary Info
       2) Produce Outbreak file and dataframe
       3) Produce Facility file and dataframe
       4) Produce County file and dataframe
        
       TODO - make display dataframes optional
       TODO - make display summary info optional"""
    [outbreak_df, summary, reporting_date] = outbreak_df_from_file(filename)

    # Print Summary Data
    if display_summary:
        for k,v in summary.items():
            print(k + ": " + str(v))    

    # Save and Display Facility data
    df_facilities = outbreak_df.groupby(['County', 'FacilityName']).sum()
    df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
    df_facilities['facilities'] = 1
    df_facilities.insert(0, 'ReportingDate', reporting_date)
    df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v2.csv')

    # Save and Display County Level Data
    df_county = df_facilities.groupby(by=['County']).sum()
    df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
    df_county.insert(0, 'ReportingDate', reporting_date)
    filename = 'Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv'
    df_county.sort_values('confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv')
    
    
    if display_dfs:
        print("\nOutbreak Data\n=============")
        display(outbreak_df.sort_values(by='deaths', ascending=False).head(5))
        print("\nFacility Data\n=============")
        display(df_facilities.sort_values('deaths', ascending=False).head(10))
        print("\nCounty Data\n===========")
        display(df_county.sort_values(by='confirmed_cases', ascending=False).head(10))

    return reporting_date, summary, outbreak_df, df_facilities, df_county

In [6]:
# DUMMY TO BE REMOVED
def facility2CMSNum (facilityName):
    if facilityName in ltc_name2cms_id:
        return ltc_name2cms_id[facilityName]
    else:
        return "No Match"

def add_cms_data_to_df(df, cms_df, df_key, cms_key):
    ''' 
        Steo 1 - Add CMS Provider Number based on Facility Name field
        Step 2 - Merge with CMS data
    '''
    # Specify columns from CMS data feed to add
    cols=['Week Ending', 'Federal Provider Number', 'Provider Name',
           'Provider Address', 'Provider City', 'Provider State', 'County', 
           'Provider Zip Code', 'Submitted Data',
           'Residents Weekly Admissions COVID-19',
           'Residents Total Admissions COVID-19',
           'Residents Weekly Confirmed COVID-19',
           'Residents Total Confirmed COVID-19',
           'Residents Weekly Suspected COVID-19',
           'Residents Total Suspected COVID-19', 
           'Residents Weekly All Deaths',
           'Residents Total All Deaths', 
           'Residents Weekly COVID-19 Deaths',
           'Residents Total COVID-19 Deaths', 'Number of All Beds',
           'Total Number of Occupied Beds',
           'Staff Weekly Confirmed COVID-19', 'Staff Total Confirmed COVID-19',
           'Staff Weekly Suspected COVID-19', 'Staff Total Suspected COVID-19',
           'Staff Weekly COVID-19 Deaths', 'Staff Total COVID-19 Deaths',
           'Shortage of Nursing Staff', 'Shortage of Clinical Staff',
           'Shortage of Aides', 'Shortage of Other Staff',
           'Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents',
           'Weekly Resident COVID-19 Deaths Per 1,000 Residents',
           'Total Resident Confirmed COVID-19 Cases Per 1,000 Residents',
           'Total Resident COVID-19 Deaths Per 1,000 Residents',
           'Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases',
           'Three or More Confirmed COVID-19 Cases This Week',
           'Initial Confirmed COVID-19 Case This Week', 'Geolocation',
           'Resident Access to Testing in Facility',
           'Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days',
           'During Past Two Weeks Average Time to Receive Resident Test Results',
           'Has Facility Performed Resident Tests Since Last Report',
           'Tested Residents with New Signs or Symptoms',
           'Tested Asymptomatic Residents in a Unit or Section After a New Case',
           'Tested Asymptomatic Residents Facility-Wide After a New Case',
           'Tested Asymptomatic Residents Without Known Exposure as Surveillance',
           'Tested Another Subgroup of Residents',
           'Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days',
           'During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results',
           'Has Facility Performed Staff and/or Personnel Tests Since Last Report',
           'Tested Staff and/or Personnel with New Signs or Symptoms',
           'Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case',
           'Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case',
           'Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance',
           'Tested Another Subgroup of Staff and/or Personnel',
           'In-House Point-of-Care Test Machine',
           'COVID-19 Point-of-Care Tests Performed on Residents Since Last Report',
           'COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report',
           'Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine',
           'Any Current Supply of N95 Masks', 'One-Week Supply of N95 Masks',
           'Any Current Supply of Surgical Masks',
           'One-Week Supply of Surgical Masks',
           'Any Current Supply of Eye Protection',
           'One-Week Supply of Eye Protection', 'Any Current Supply of Gowns',
           'One-Week Supply of Gowns', 'Any Current Supply of Gloves',
           'One-Week Supply of Gloves', 'Any Current Supply of Hand Sanitizer',
           'One-Week Supply of Hand Sanitizer', 'Ventilator Dependent Unit',
           'Number of Ventilators in Facility',
           'Number of Ventilators in Use for COVID-19',
           'Any Current Supply of Ventilator Supplies',
           'One-Week Supply of Ventilator Supplies',
           'Geolocation'
           ]

    df_w_cms = pd.merge(df, cms_df[cols], left_on=df_key, right_on=cms_key, how='left')
    return df_w_cms

# 1 - Pull IL JSON File from Website

In [7]:
json_file = pull_IL_json_from_web()
#!chmod 444 $json_file
with open(json_file) as f:
  ltc_data = json.load(f)
          
# Extract Reporting Data
reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
reporting_date

'2020-12-04'

# 2 - Load Supporting Files (CMS et al)

In [8]:
# Load Facility Name to CMS ID json file
fac2CMS_file = 'IL_FacilityName_to_CMS_ID.json'
with open(fac2CMS_file) as f:
  ltc_name2cms_id = json.load(f) 

# 3 - Process JSON File to Create Files and DFs
Data is at the Outbreak level. A Facility can have 1 to Many Outbreaks (not sure about 0).
Will create a file and data frame at the level of: Outbreaks, Facilities and Counties

In [9]:
print('Source File: ' + str(json_file))
[reporting_date, summary, outbreak_df, df_facilities, df_county] = process_json_IL(json_file, display_dfs=False)

Source File: Source_data/IL_2020-12-04_LTC_data_Source.json
Date: 2020-12-04
Cases: 50418
Deaths: 6527
Outbreaks: 1675
Facilities: 1379


# 4 - Connect to CMS Provider Number

In [10]:
def facility2CMSNum (facilityName):
    if facilityName in ltc_name2cms_id:
        return ltc_name2cms_id[facilityName]
    else:
        return "No Match"
    
df_facilities.reset_index(inplace=True) # Needed because used group by to get facility level data ToDo: COnsider moving this code up
df_facilities['county-facName']= df_facilities['County'].str.upper() + '-' + df_facilities['FacilityName'].str.upper()
df_facilities['CMS_ProvNum'] = df_facilities['county-facName'].apply(lambda x: facility2CMSNum(x))

In [11]:
outbreak_df['county-facName'] = outbreak_df['County'].str.upper() + '-' + outbreak_df['FacilityName'].str.upper()
outbreak_df['CMS_ProvNum'] = outbreak_df['county-facName'].apply(lambda x: facility2CMSNum(x))

outbreak_df

Unnamed: 0,reporting_date,County,FacilityName,confirmed_cases,deaths,ReportDate,status,CFR,outbreaks,county-facName,CMS_ProvNum
0,2020-12-04,Adams,Chaddock,3,0,2020-12-04T00:00:00,Closed,0.000000,1,ADAMS-CHADDOCK,No Match
1,2020-12-04,Adams,Illinois Veterans Home Quincy,88,3,2020-12-04T00:00:00,Open,0.034091,1,ADAMS-ILLINOIS VETERANS HOME QUINCY,No Match
2,2020-12-04,Adams,Good Samaritan Home,34,1,2020-12-04T00:00:00,Open,0.029412,1,ADAMS-GOOD SAMARITAN HOME,145773
3,2020-12-04,Adams,Adams Pointe Senior Living,4,0,2020-12-04T00:00:00,Closed,0.000000,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match
4,2020-12-04,Adams,Bradford Villa,8,1,2020-12-04T00:00:00,Closed,0.125000,1,ADAMS-BRADFORD VILLA,No Match
...,...,...,...,...,...,...,...,...,...,...,...
1670,2020-12-04,Woodford,Heritage Health El Paso,41,9,2020-12-04T00:00:00,Open,0.219512,1,WOODFORD-HERITAGE HEALTH EL PASO,145319
1671,2020-12-04,Woodford,Apostolic Christian Home of Roanoke,6,0,2020-12-04T00:00:00,Open,0.000000,1,WOODFORD-APOSTOLIC CHRISTIAN HOME OF ROANOKE,No Match
1672,2020-12-04,Woodford,Apostolic Christian Home Eureka,43,7,2020-12-04T00:00:00,Open,0.162791,1,WOODFORD-APOSTOLIC CHRISTIAN HOME EUREKA,No Match
1673,2020-12-04,Woodford,The Loft Rehabilitation & Nursing,2,1,2020-12-04T00:00:00,Open,0.500000,1,WOODFORD-THE LOFT REHABILITATION & NURSING,No Match


In [12]:
cols = list(df_facilities.columns)#.remove('county-facName')
cols.remove('county-facName')
df_facilities[cols].to_csv('Reporting_data/IL_2020-11-27_Facilities_LTC_data_v3.csv', index=False)

In [13]:
df_facilities.sort_values(by='confirmed_cases', ascending=False)

Unnamed: 0,County,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum
744,Kankakee,Samuel H. Shapiro Developmental Center,2020-12-04,312,1,0.003205,2,1,KANKAKEE-SAMUEL H. SHAPIRO DEVELOPMENTAL CENTER,No Match
300,Cook,Ludeman Developmental Center,2020-12-04,263,9,0.034221,1,1,COOK-LUDEMAN DEVELOPMENTAL CENTER,No Match
214,Cook,City View Multi Care Center,2020-12-04,249,15,0.060241,1,1,COOK-CITY VIEW MULTI CARE CENTER,145850
789,Lake,Avantara Long Grove,2020-12-04,228,32,0.140351,1,1,LAKE-AVANTARA LONG GROVE,145868
356,Cook,Peterson Park Health Care,2020-12-04,225,36,0.160000,2,1,COOK-PETERSON PARK HEALTH CARE,145838
...,...,...,...,...,...,...,...,...,...,...
1249,Washington,"Washington County Hospital, Long Term Care Unit",2020-12-04,2,1,0.500000,1,1,"WASHINGTON-WASHINGTON COUNTY HOSPITAL, LONG TE...",No Match
637,Jackson,The Landings at Reed Station Crossing,2020-12-04,2,0,0.000000,1,1,JACKSON-THE LANDINGS AT REED STATION CROSSING,No Match
647,Jefferson,White Oak Rehabilitation & Health Care Center,2020-12-04,2,0,0.000000,1,1,JEFFERSON-WHITE OAK REHABILITATION & HEALTH CA...,No Match
1252,Wayne,Brookstone Estates,2020-12-04,2,0,0.000000,1,1,WAYNE-BROOKSTONE ESTATES,No Match


# 5 - Load data from CMS

In [14]:
# Load CMS Dataset from CMS website
url_csv = 'https://data.cms.gov/api/views/s2uc-8wxp/rows.csv?accessType=DOWNLOAD&api_foundry=true'

response = urllib2.urlopen(url_csv)
cms_data = pd.read_csv(response, parse_dates=['Week Ending'], dtype={'Provider Name': str})
max_date = cms_data['Week Ending'].max()
reporting_date = str(max_date)[0:10]
print(reporting_date)
display(cms_data.head(5))

cms_data_latest = cms_data[cms_data['Week Ending'] == max_date]

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


2020-11-22


Unnamed: 0,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Submitted Data,Passed Quality Assurance Check,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Resident Access to Testing in Facility,Laboratory Type Is State Health Dept,Laboratory Type Is Private Lab,Laboratory Type Is Other,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,Reason for Not Testing Residents - Lack of PPE for Personnel,Reason for Not Testing Residents - Lack of Supplies,Reason for Not Testing Residents - Lack of Access to Laboratory,Reason for Not Testing Residents - Lack of Access to Trained Personnel,Reason for Not Testing Residents - Uncertainty About Reimbursement,Reason for Not Testing Residents - Other,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,Reason for Not Testing Staff and/or Personnel - Lack of PPE for Personnel,Reason for Not Testing Staff and/or Personnel - Lack of Supplies,Reason for Not Testing Staff and/or Personnel - Lack of Access to Laboratory,Reason for Not Testing Staff and/or Personnel - Lack of Access to Trained Personnel,Reason for Not Testing Staff and/or Personnel - Uncertainty About Reimbursement,Reason for Not Testing Staff and/or Personnel - Other,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,County,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Reporting Interval
0,2020-05-24,105045,BRADEN RIVER REHABILITATION CENTER LLC,2010 MANATEE AVE E,BRADENTON,FL,34208,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Manatee,,,POINT (-82.539097 27.496201000000003),Week 1 - May 24
1,2020-05-24,105384,CALUSA HARBOUR,2525 FIRST ST,FORT MYERS,FL,33901,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Lee,,,POINT (-81.864238 26.647471),Week 1 - May 24
2,2020-05-24,105453,KENSINGTON GARDENS REHAB AND NURSING CENTER,2055 PALMETTO ST,CLEARWATER,FL,33758,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pinellas,,,POINT (-82.751998 27.975286),Week 1 - May 24
3,2020-05-24,105460,NORTH FLORIDA REHABILITATION AND SPECIALTY CARE,6700 NW 10TH PLACE,GAINESVILLE,FL,32605,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Alachua,,,POINT (-82.413587 29.661501),Week 1 - May 24
4,2020-05-31,105478,ADVANCED CARE CENTER,401 FAIRWOOD AVE,CLEARWATER,FL,33759,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pinellas,,,POINT (-82.724356 27.970223),Week 2 - May 31


In [15]:
# Get CMS data for a particular state
myState = 'IL'
state_cms_data = cms_data[cms_data['Provider State'] == myState].copy()

# Get latest week of CMS data 
state_cms_data_latest = state_cms_data[state_cms_data['Week Ending'] == max_date]

# 6 - Merge DFs with CMS data

In [16]:
# Combine State with CMS data
df_facilities_w_cms = add_cms_data_to_df(df_facilities, state_cms_data_latest, 'CMS_ProvNum', 'Federal Provider Number')

# Remove added column to ensure match
cols = list(df_facilities_w_cms.columns)#.remove('county-facName')
cols.remove('county-facName')

# Save to CSV
df_facilities_w_cms.to_csv('Reporting_data/IL_2020-11-27_Facilities_LTC_data_v3.csv', index=False)

# display check
df_facilities_w_cms

Unnamed: 0,County_x,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,County_y,Provider Zip Code,Submitted Data,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Resident Access to Testing in Facility,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,Geolocation.1
0,Adams,Adams Pointe Senior Living,2020-12-04,4,0,0.000000,1,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Adams,Bradford Villa,2020-12-04,8,1,0.125000,1,1,ADAMS-BRADFORD VILLA,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Adams,Cedarhurst,2020-12-04,24,1,0.041667,1,1,ADAMS-CEDARHURST,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Adams,Chaddock,2020-12-04,3,0,0.000000,1,1,ADAMS-CHADDOCK,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Adams,Golden Good Shepperd Home,2020-12-04,54,8,0.148148,1,1,ADAMS-GOLDEN GOOD SHEPPERD HOME,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1374,Woodford,El Paso Health Care Center,2020-12-04,3,0,0.000000,1,1,WOODFORD-EL PASO HEALTH CARE CENTER,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1375,Woodford,Heritage Health El Paso,2020-12-04,41,9,0.219512,1,1,WOODFORD-HERITAGE HEALTH EL PASO,145319,2020-11-22,145319,HERITAGE HEALTH-EL PASO,555 EAST CLAY,EL PASO,IL,Woodford,61738.0,Y,0.0,8.0,0.0,43.0,0.0,9.0,0.0,26.0,0.0,8.0,65.0,43.0,0.0,20.0,0.0,27.0,0.0,0.0,N,N,N,N,0.0,0.0,1000.0,186.0,18.6,N,N,POINT (-89.007621 40.734169),,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,0.0,5.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,POINT (-89.007621 40.734169)
1376,Woodford,Snyder Village Assisted Living,2020-12-04,2,0,0.000000,1,1,WOODFORD-SNYDER VILLAGE ASSISTED LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1377,Woodford,Snyder Village Health Center,2020-12-04,25,2,0.080000,1,1,WOODFORD-SNYDER VILLAGE HEALTH CENTER,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [17]:
df_facilities_w_cms = add_cms_data_to_df(df_facilities, state_cms_data_latest, 'CMS_ProvNum', 'Federal Provider Number')

# Remove added column to ensure match
cols = list(df_facilities_w_cms.columns)#.remove('county-facName')
cols.remove('county-facName')

# Save to CSV
df_facilities_w_cms.to_csv('Reporting_data/IL_2020-11-27_Facilities_LTC_data_v3.csv', index=False)

# display check
df_facilities_w_cms

Unnamed: 0,County_x,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,county-facName,CMS_ProvNum,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,County_y,Provider Zip Code,Submitted Data,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Resident Access to Testing in Facility,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,Geolocation.1
0,Adams,Adams Pointe Senior Living,2020-12-04,4,0,0.000000,1,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Adams,Bradford Villa,2020-12-04,8,1,0.125000,1,1,ADAMS-BRADFORD VILLA,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Adams,Cedarhurst,2020-12-04,24,1,0.041667,1,1,ADAMS-CEDARHURST,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Adams,Chaddock,2020-12-04,3,0,0.000000,1,1,ADAMS-CHADDOCK,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Adams,Golden Good Shepperd Home,2020-12-04,54,8,0.148148,1,1,ADAMS-GOLDEN GOOD SHEPPERD HOME,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1374,Woodford,El Paso Health Care Center,2020-12-04,3,0,0.000000,1,1,WOODFORD-EL PASO HEALTH CARE CENTER,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1375,Woodford,Heritage Health El Paso,2020-12-04,41,9,0.219512,1,1,WOODFORD-HERITAGE HEALTH EL PASO,145319,2020-11-22,145319,HERITAGE HEALTH-EL PASO,555 EAST CLAY,EL PASO,IL,Woodford,61738.0,Y,0.0,8.0,0.0,43.0,0.0,9.0,0.0,26.0,0.0,8.0,65.0,43.0,0.0,20.0,0.0,27.0,0.0,0.0,N,N,N,N,0.0,0.0,1000.0,186.0,18.6,N,N,POINT (-89.007621 40.734169),,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,0.0,5.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,POINT (-89.007621 40.734169)
1376,Woodford,Snyder Village Assisted Living,2020-12-04,2,0,0.000000,1,1,WOODFORD-SNYDER VILLAGE ASSISTED LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1377,Woodford,Snyder Village Health Center,2020-12-04,25,2,0.080000,1,1,WOODFORD-SNYDER VILLAGE HEALTH CENTER,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [18]:
# Combine State with CMS data
outbreak_df_w_cms = add_cms_data_to_df(outbreak_df, state_cms_data_latest, 'CMS_ProvNum', 'Federal Provider Number')

# Remove added column to ensure match
cols = list(outbreak_df_w_cms.columns)#.remove('county-facName')
cols.remove('county-facName')

# Save to CSV
outbreak_df_w_cms.to_csv('Reporting_data/IL_2020-11-27_Outbreaks_LTC_data_v3.csv', index=False)

# display check
outbreak_df_w_cms

Unnamed: 0,reporting_date,County_x,FacilityName,confirmed_cases,deaths,ReportDate,status,CFR,outbreaks,county-facName,CMS_ProvNum,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,County_y,Provider Zip Code,Submitted Data,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Resident Access to Testing in Facility,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,Geolocation.1
0,2020-12-04,Adams,Chaddock,3,0,2020-12-04T00:00:00,Closed,0.000000,1,ADAMS-CHADDOCK,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2020-12-04,Adams,Illinois Veterans Home Quincy,88,3,2020-12-04T00:00:00,Open,0.034091,1,ADAMS-ILLINOIS VETERANS HOME QUINCY,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2020-12-04,Adams,Good Samaritan Home,34,1,2020-12-04T00:00:00,Open,0.029412,1,ADAMS-GOOD SAMARITAN HOME,145773,2020-11-22,145773,GOOD SAMARITAN HOME,2130 HARRISON STREET,QUINCY,IL,Adams,62301.0,Y,0.0,0.0,6.0,9.0,0.0,9.0,1.0,22.0,0.0,0.0,213.0,165.0,12.0,47.0,0.0,30.0,0.0,0.0,Y,N,Y,Y,36.4,0.0,54.5,0.0,0.0,Y,N,POINT (-91.380324 39.913547),,Y,1-2 DAYS,Y,N,Y,N,N,N,Y,1-2 DAYS,Y,Y,Y,Y,N,N,Y,0.0,263.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,POINT (-91.380324 39.913547)
3,2020-12-04,Adams,Adams Pointe Senior Living,4,0,2020-12-04T00:00:00,Closed,0.000000,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2020-12-04,Adams,Bradford Villa,8,1,2020-12-04T00:00:00,Closed,0.125000,1,ADAMS-BRADFORD VILLA,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1670,2020-12-04,Woodford,Heritage Health El Paso,41,9,2020-12-04T00:00:00,Open,0.219512,1,WOODFORD-HERITAGE HEALTH EL PASO,145319,2020-11-22,145319,HERITAGE HEALTH-EL PASO,555 EAST CLAY,EL PASO,IL,Woodford,61738.0,Y,0.0,8.0,0.0,43.0,0.0,9.0,0.0,26.0,0.0,8.0,65.0,43.0,0.0,20.0,0.0,27.0,0.0,0.0,N,N,N,N,0.0,0.0,1000.0,186.0,18.6,N,N,POINT (-89.007621 40.734169),,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,1-2 DAYS,Y,N,N,Y,Y,N,Y,0.0,5.0,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,POINT (-89.007621 40.734169)
1671,2020-12-04,Woodford,Apostolic Christian Home of Roanoke,6,0,2020-12-04T00:00:00,Open,0.000000,1,WOODFORD-APOSTOLIC CHRISTIAN HOME OF ROANOKE,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1672,2020-12-04,Woodford,Apostolic Christian Home Eureka,43,7,2020-12-04T00:00:00,Open,0.162791,1,WOODFORD-APOSTOLIC CHRISTIAN HOME EUREKA,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1673,2020-12-04,Woodford,The Loft Rehabilitation & Nursing,2,1,2020-12-04T00:00:00,Open,0.500000,1,WOODFORD-THE LOFT REHABILITATION & NURSING,No Match,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# Play Area

In [19]:
display(outbreak_df_w_cms.columns[80:])
cms_data_latest.columns[80:]

Index(['One-Week Supply of Gloves', 'Any Current Supply of Hand Sanitizer',
       'One-Week Supply of Hand Sanitizer', 'Ventilator Dependent Unit',
       'Number of Ventilators in Facility',
       'Number of Ventilators in Use for COVID-19',
       'Any Current Supply of Ventilator Supplies',
       'One-Week Supply of Ventilator Supplies', 'Geolocation'],
      dtype='object')

Index(['Number of Ventilators in Facility',
       'Number of Ventilators in Use for COVID-19',
       'Any Current Supply of Ventilator Supplies',
       'One-Week Supply of Ventilator Supplies',
       'Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents',
       'Weekly Resident COVID-19 Deaths Per 1,000 Residents',
       'Total Resident Confirmed COVID-19 Cases Per 1,000 Residents',
       'Total Resident COVID-19 Deaths Per 1,000 Residents',
       'Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases',
       'County', 'Three or More Confirmed COVID-19 Cases This Week',
       'Initial Confirmed COVID-19 Case This Week', 'Geolocation',
       'Reporting Interval'],
      dtype='object')

In [20]:
state_cms_data_latest['Tested Asymptomatic Residents Without Known Exposure as Surveillance'].value_counts()
state_cms_data_latest['In-House Point-of-Care Test Machine'].value_counts()
field = 'Shortage of Nursing Staff'
field = 'Number of Ventilators in Use for COVID-19'
field = 'Tested Residents with New Signs or Symptoms'

fields = ['Shortage of Nursing Staff', 'Shortage of Clinical Staff',
       'Shortage of Aides', 'Shortage of Other Staff']
for field in fields:
    print(field + ": " + str(state_cms_data_latest[field].value_counts()['Y']))

#     display(state_cms_data_latest[field].value_counts())
#     print("")
    
for field in fields:
    print(field + ": " + str(cms_data_latest[field].value_counts()['Y']))
    #display(cms_data_latest[field].value_counts()['Y'])
    #print("")

Shortage of Nursing Staff: 132
Shortage of Clinical Staff: 23
Shortage of Aides: 148
Shortage of Other Staff: 77
Shortage of Nursing Staff: 2679
Shortage of Clinical Staff: 418
Shortage of Aides: 2993
Shortage of Other Staff: 1580


In [21]:
a = cms_data_latest[field].value_counts()
a['Y']

1580

In [22]:
state_cms_data_latest.head(20)

Unnamed: 0,Week Ending,Federal Provider Number,Provider Name,Provider Address,Provider City,Provider State,Provider Zip Code,Submitted Data,Passed Quality Assurance Check,Residents Weekly Admissions COVID-19,Residents Total Admissions COVID-19,Residents Weekly Confirmed COVID-19,Residents Total Confirmed COVID-19,Residents Weekly Suspected COVID-19,Residents Total Suspected COVID-19,Residents Weekly All Deaths,Residents Total All Deaths,Residents Weekly COVID-19 Deaths,Residents Total COVID-19 Deaths,Number of All Beds,Total Number of Occupied Beds,Resident Access to Testing in Facility,Laboratory Type Is State Health Dept,Laboratory Type Is Private Lab,Laboratory Type Is Other,Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days,Reason for Not Testing Residents - Lack of PPE for Personnel,Reason for Not Testing Residents - Lack of Supplies,Reason for Not Testing Residents - Lack of Access to Laboratory,Reason for Not Testing Residents - Lack of Access to Trained Personnel,Reason for Not Testing Residents - Uncertainty About Reimbursement,Reason for Not Testing Residents - Other,During Past Two Weeks Average Time to Receive Resident Test Results,Has Facility Performed Resident Tests Since Last Report,Tested Residents with New Signs or Symptoms,Tested Asymptomatic Residents in a Unit or Section After a New Case,Tested Asymptomatic Residents Facility-Wide After a New Case,Tested Asymptomatic Residents Without Known Exposure as Surveillance,Tested Another Subgroup of Residents,Able to Test or Obtain Resources to Test All Staff and/or Personnel Within Next 7 Days,Reason for Not Testing Staff and/or Personnel - Lack of PPE for Personnel,Reason for Not Testing Staff and/or Personnel - Lack of Supplies,Reason for Not Testing Staff and/or Personnel - Lack of Access to Laboratory,Reason for Not Testing Staff and/or Personnel - Lack of Access to Trained Personnel,Reason for Not Testing Staff and/or Personnel - Uncertainty About Reimbursement,Reason for Not Testing Staff and/or Personnel - Other,During Past Two Weeks Average Time to Receive Staff and/or Personnel Test Results,Has Facility Performed Staff and/or Personnel Tests Since Last Report,Tested Staff and/or Personnel with New Signs or Symptoms,Tested Asymptomatic Staff and/or Personnel in a Unit or Section After a New Case,Tested Asymptomatic Staff and/or Personnel Facility-Wide After a New Case,Tested Asymptomatic Staff and/or Personnel Without Known Exposure as Surveillance,Tested Another Subgroup of Staff and/or Personnel,In-House Point-of-Care Test Machine,COVID-19 Point-of-Care Tests Performed on Residents Since Last Report,COVID-19 Point-of-Care Tests Performed on Staff and/or Personnel Since Last Report,Enough Supplies to Test All Staff and/or Personnel Using Point-of-Care Test Machine,Staff Weekly Confirmed COVID-19,Staff Total Confirmed COVID-19,Staff Weekly Suspected COVID-19,Staff Total Suspected COVID-19,Staff Weekly COVID-19 Deaths,Staff Total COVID-19 Deaths,Shortage of Nursing Staff,Shortage of Clinical Staff,Shortage of Aides,Shortage of Other Staff,Any Current Supply of N95 Masks,One-Week Supply of N95 Masks,Any Current Supply of Surgical Masks,One-Week Supply of Surgical Masks,Any Current Supply of Eye Protection,One-Week Supply of Eye Protection,Any Current Supply of Gowns,One-Week Supply of Gowns,Any Current Supply of Gloves,One-Week Supply of Gloves,Any Current Supply of Hand Sanitizer,One-Week Supply of Hand Sanitizer,Ventilator Dependent Unit,Number of Ventilators in Facility,Number of Ventilators in Use for COVID-19,Any Current Supply of Ventilator Supplies,One-Week Supply of Ventilator Supplies,"Weekly Resident Confirmed COVID-19 Cases Per 1,000 Residents","Weekly Resident COVID-19 Deaths Per 1,000 Residents","Total Resident Confirmed COVID-19 Cases Per 1,000 Residents","Total Resident COVID-19 Deaths Per 1,000 Residents",Total Residents COVID-19 Deaths as a Percentage of Confirmed COVID-19 Cases,County,Three or More Confirmed COVID-19 Cases This Week,Initial Confirmed COVID-19 Case This Week,Geolocation,Reporting Interval
19,2020-11-22,145244,"MOSAIC OF LAKESHORE, THE",7200 NORTH SHERIDAN ROAD,CHICAGO,IL,60626,N,,0.0,15.0,0.0,12.0,0.0,13.0,0.0,4.0,0.0,11.0,313.0,156.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,4.0,0.0,1.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,76.9,70.5,91.7,Cook,,,POINT (-87.663674 42.013194),Week 27 - Nov 22
76053,2020-11-22,145410,BREESE NURSING HOME,1155 NORTH FIRST STREET,BREESE,IL,62230,Y,Y,0.0,4.0,0.0,32.0,0.0,1.0,0.0,11.0,0.0,10.0,112.0,27.0,,,,,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,N,Y,N,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,N,Y,N,Y,5.0,28.0,Y,0.0,8.0,0.0,14.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,1185.2,370.4,31.3,Clinton,N,N,POINT (-89.519924 38.607462),Week 27 - Nov 22
76070,2020-11-22,145413,APERION CARE TOLUCA,101 EAST VIA GHIGLIERI,TOLUCA,IL,61369,Y,Y,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,101.0,78.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,N,N,N,N,N,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,N,Y,N,Y,0.0,1.0,Y,0.0,4.0,0.0,0.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,12.8,0.0,0.0,Marshall,N,N,POINT (-89.132891 41.011224),Week 27 - Nov 22
76097,2020-11-22,145126,ALDEN LINCOLN REHAB & H C CTR,504 WEST WELLINGTON AVENUE,CHICAGO,IL,60657,Y,Y,0.0,13.0,0.0,23.0,0.0,2.0,0.0,18.0,0.0,6.0,96.0,77.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,Y,N,N,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,Y,N,N,Y,2.0,60.0,Y,1.0,6.0,0.0,4.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,298.7,77.9,26.1,Cook,N,N,POINT (-87.642264 41.936534),Week 27 - Nov 22
76226,2020-11-22,145469,PARIS HEALTH AND REHAB CENTER,1011 NORTH MAIN STREET,PARIS,IL,61944,N,,0.0,0.0,0.0,0.0,0.0,12.0,0.0,9.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,27.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,Edgar,,,POINT (-87.694416 39.626423),Week 27 - Nov 22
76331,2020-11-22,145323,CARRIER MILLS NSG & REHAB CTR,"6789 US RT 45, P O BOX 68",CARRIER MILLS,IL,62917,Y,Y,0.0,0.0,1.0,9.0,0.0,1.0,1.0,13.0,0.0,0.0,99.0,88.0,,,,,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,Y,Y,N,Y,N,N,N,N,N,N,1-2 DAYS,Y,N,N,Y,Y,N,Y,2.0,0.0,Y,0.0,13.0,0.0,2.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,11.4,0.0,102.3,0.0,0.0,Saline,N,N,,Week 27 - Nov 22
76387,2020-11-22,145502,TAYLORVILLE CARE CENTER,600 SOUTH HOUSTON,TAYLORVILLE,IL,62568,Y,Y,0.0,1.0,0.0,67.0,0.0,11.0,0.0,39.0,0.0,17.0,98.0,52.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,Y,N,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,Y,N,N,Y,0.0,0.0,Y,2.0,44.0,0.0,17.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,0.0,1288.5,326.9,25.4,Christian,N,N,POINT (-89.311245 39.54411),Week 27 - Nov 22
76394,2020-11-22,145437,APERION CARE PRINCETON,515 BUREAU VALLEY PARKWAY,PRINCETON,IL,61356,Y,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bureau,,,POINT (-89.450485 41.369886),Week 27 - Nov 22
76402,2020-11-22,145026,WESTMINSTER PLACE,3200 GRANT STREET,EVANSTON,IL,60201,Y,Y,1.0,3.0,0.0,6.0,0.0,33.0,2.0,16.0,1.0,1.0,105.0,56.0,,,,,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,N,Y,Y,N,Y,N,N,N,N,N,N,3-7 DAYS,Y,N,Y,Y,N,N,Y,0.0,0.0,Y,2.0,19.0,2.0,63.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,0.0,17.9,107.1,17.9,16.7,Cook,N,N,POINT (-87.721945 42.058918),Week 27 - Nov 22
76417,2020-11-22,145447,HERITAGE HEALTH-ELGIN,355 RAYMOND STREET,ELGIN,IL,60120,Y,Y,0.0,0.0,9.0,35.0,0.0,0.0,1.0,12.0,1.0,1.0,94.0,60.0,,,,,Y,N,N,N,N,N,N,<1 DAY,Y,Y,N,N,Y,N,Y,N,N,N,N,N,N,<1 DAY,Y,N,N,N,Y,N,Y,57.0,77.0,Y,0.0,26.0,0.0,0.0,0.0,0.0,N,N,N,N,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,N,,,,,150.0,16.7,583.3,16.7,2.9,Kane,Y,N,POINT (-88.273648 42.029128),Week 27 - Nov 22


In [23]:
import numpy as np
df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'value': np.random.randn(4)})
df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
                    'value': np.random.randn(4)})

In [24]:
# show all records from df1
pd.merge(df1, df2, on='key', how='left')

Unnamed: 0,key,value_x,value_y
0,A,0.625011,
1,B,0.419353,-1.126457
2,C,1.767383,
3,D,-0.920111,-0.150778
4,D,-0.920111,0.475407


In [25]:
print(len(state_cms_data_latest))
print(len(state_cms_data_latest['Federal Provider Number'].unique()))
# TODO - add check these are equal (otherwise extra row will occur)

712
712


In [26]:
cms_data['Week Ending'].min()

Timestamp('2020-05-24 00:00:00')

In [27]:
cms_data[['Week Ending', 'Geolocation']]

Unnamed: 0,Week Ending,Geolocation
0,2020-05-24,POINT (-82.539097 27.496201000000003)
1,2020-05-24,POINT (-81.864238 26.647471)
2,2020-05-24,POINT (-82.751998 27.975286)
3,2020-05-24,POINT (-82.413587 29.661501)
4,2020-05-31,POINT (-82.724356 27.970223)
...,...,...
414660,2020-10-04,
414661,2020-09-06,POINT (-95.74932 29.764549)
414662,2020-08-23,POINT (-98.051936 27.758739)
414663,2020-08-23,


In [28]:
state_cms_data_latest[['Week Ending', 'Geolocation']]

Unnamed: 0,Week Ending,Geolocation
19,2020-11-22,POINT (-87.663674 42.013194)
76053,2020-11-22,POINT (-89.519924 38.607462)
76070,2020-11-22,POINT (-89.132891 41.011224)
76097,2020-11-22,POINT (-87.642264 41.936534)
76226,2020-11-22,POINT (-87.694416 39.626423)
...,...,...
107648,2020-11-22,POINT (-87.623323 41.834371)
107685,2020-11-22,POINT (-89.99439 38.407675000000005)
107951,2020-11-22,POINT (-88.935446 38.311998)
108476,2020-11-22,POINT (-89.01823 39.619704)


In [29]:
outbreak_df_w_cms[['Week Ending', 'CMS_ProvNum', 'Geolocation']]

Unnamed: 0,Week Ending,CMS_ProvNum,Geolocation,Geolocation.1
0,NaT,No Match,,
1,NaT,No Match,,
2,2020-11-22,145773,POINT (-91.380324 39.913547),POINT (-91.380324 39.913547)
3,NaT,No Match,,
4,NaT,No Match,,
...,...,...,...,...
1670,2020-11-22,145319,POINT (-89.007621 40.734169),POINT (-89.007621 40.734169)
1671,NaT,No Match,,
1672,NaT,No Match,,
1673,NaT,No Match,,


In [30]:
cms_data.columns

Index(['Week Ending', 'Federal Provider Number', 'Provider Name',
       'Provider Address', 'Provider City', 'Provider State',
       'Provider Zip Code', 'Submitted Data', 'Passed Quality Assurance Check',
       'Residents Weekly Admissions COVID-19',
       'Residents Total Admissions COVID-19',
       'Residents Weekly Confirmed COVID-19',
       'Residents Total Confirmed COVID-19',
       'Residents Weekly Suspected COVID-19',
       'Residents Total Suspected COVID-19', 'Residents Weekly All Deaths',
       'Residents Total All Deaths', 'Residents Weekly COVID-19 Deaths',
       'Residents Total COVID-19 Deaths', 'Number of All Beds',
       'Total Number of Occupied Beds',
       'Resident Access to Testing in Facility',
       'Laboratory Type Is State Health Dept',
       'Laboratory Type Is Private Lab', 'Laboratory Type Is Other',
       'Able to Test or Obtain Resources to Test All Current Residents Within Next 7 Days',
       'Reason for Not Testing Residents - Lack of P