# Load IL LTC data from Web

## Doing
 - [ ] Add [CMS Federal Provider Number] to [df_facilities]

## To Do's
 - [ ] Check for new [State LTC Facility Name] in State feed (for use in identifying/looking for matching Name in CMS
 - [ ] Add CMS info to [df_facilities]
 - [ ] Fix so that we map County and Facility Name to CMS ProvNUM
 
## Done
 - [x] Load [State LTC Facility Name] to [CMS Federal Provider Number] 


In [1]:
import pandas as pd
import urllib3 as urllib
import json
import glob
import IPython.display

# Functions

In [2]:
http = urllib.PoolManager()

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

In [3]:
def pull_IL_json_from_web():
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    #ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    f = open(file, "w")
    f.write(ltc_data_json)
    f.close()
    return file

In [4]:
def outbreak_df_from_file(filename):
    """ From Json file:
        1) return DataFrame augmented and save to file
        2) return Summary data"""
    with open(filename) as f:
      ltc_data = json.load(f)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
    df = pd.DataFrame(ltc_data['FacilityValues'])
    df.insert(0, 'reporting_date', reporting_date)
    df['CFR'] = (df['deaths'] / df['confirmed_cases'])
    df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility
    #Save Outbreak data to a file
    outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v2.csv'
    df.to_csv(outbreak_file, index = False)
    
    # Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
    deaths = ltc_data['LTC_Reported_Cases']['deaths']
    confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']
    facility_cnt = len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False))
    summary = {}
    summary['Date'] = reporting_date
    summary['Cases'] = confirmed_cases
    summary['Deaths'] = deaths
    summary['Outbreaks'] = df.reporting_date.value_counts()[0]
    summary['Facilities'] = facility_cnt
    
    return df, summary, reporting_date

In [5]:
def process_json_IL (filename, display_dfs=False, display_summary=True):
    """Process a JSON file to:
       1) Produce Summary Info
       2) Produce Outbreak file and dataframe
       3) Produce Facility file and dataframe
       4) Produce County file and dataframe
        
       TODO - make display dataframes optional
       TODO - make display summary info optional"""
    [outbreak_df, summary, reporting_date] = outbreak_df_from_file(filename)

    # Print Summary Data
    if display_summary:
        for k,v in summary.items():
            print(k + ": " + str(v))    

    # Save and Display Facility data
    df_facilities = outbreak_df.groupby(['County', 'FacilityName']).sum()
    df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
    df_facilities['facilities'] = 1
    df_facilities.insert(0, 'ReportingDate', reporting_date)
    df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v2.csv')

    # Save and Display County Level Data
    df_county = df_facilities.groupby(by=['County']).sum()
    df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
    df_county.insert(0, 'ReportingDate', reporting_date)
    filename = 'Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv'
    df_county.sort_values('confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv')
    
    
    if display_dfs:
        print("\nOutbreak Data\n=============")
        display(outbreak_df.sort_values(by='deaths', ascending=False).head(5))
        print("\nFacility Data\n=============")
        display(df_facilities.sort_values('deaths', ascending=False).head(10))
        print("\nCounty Data\n===========")
        display(df_county.sort_values(by='confirmed_cases', ascending=False).head(10))

    return reporting_date, summary, outbreak_df, df_facilities, df_county

# 1 - Pull JSON File from Website

In [6]:
json_file = pull_IL_json_from_web()
#!chmod 444 $json_file
with open(json_file) as f:
  ltc_data = json.load(f)
          
# Extract Reporting Data
reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
reporting_date

'2020-11-27'

# 2 - Load Supporting Files (CMS et al)

In [7]:
# Load Facility Name to CMS ID json file
fac2CMS_file = 'IL_FacilityName_to_CMS_ID.json'
with open(fac2CMS_file) as f:
  ltc_name2cms_id = json.load(f) 

# 3 - Process JSON File to Create Files and DFs
Data is at the Outbreak level. A Facility can have 1 to Many Outbreaks (not sure about 0).
Will create a file and data frame at the level of: Outbreaks, Facilities and Counties

In [8]:
print('Source File: ' + str(json_file))
[reporting_date, summary, outbreak_df, df_facilities, df_county] = process_json_IL(json_file, display_dfs=True)

Source File: Source_data/IL_2020-11-27_LTC_data_Source.json
Date: 2020-11-27
Cases: 45882
Deaths: 6047
Outbreaks: 1584
Facilities: 1319

Outbreak Data


Unnamed: 0,reporting_date,County,FacilityName,confirmed_cases,deaths,ReportDate,status,CFR,outbreaks
198,2020-11-27,Cook,Niles Nursing and Rehab Center,213,54,2020-11-27T00:00:00,Closed,0.253521,1
213,2020-11-27,Cook,Norridge Gardens,168,46,2020-11-27T00:00:00,Open,0.27381,1
135,2020-11-27,Cook,Villa at Windsor Park,160,44,2020-11-27T00:00:00,Closed,0.275,1
1461,2020-11-27,Will,Meadowbrook Manor of Bolingbrook,188,41,2020-11-27T00:00:00,Closed,0.218085,1
248,2020-11-27,Cook,Woodbridge Nursing Pavilion,219,40,2020-11-27T00:00:00,Closed,0.182648,1



Facility Data


Unnamed: 0_level_0,Unnamed: 1_level_0,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities
County,FacilityName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cook,Niles Nursing and Rehab Center,2020-11-27,217,54,0.248848,2,1
Cook,Norridge Gardens,2020-11-27,168,46,0.27381,1,1
Cook,Villa at Windsor Park,2020-11-27,162,44,0.271605,2,1
Will,Meadowbrook Manor of Bolingbrook,2020-11-27,194,41,0.21134,2,1
Cook,Woodbridge Nursing Pavilion,2020-11-27,219,40,0.182648,1,1
DuPage,Manorcare Hinsdale,2020-11-27,190,37,0.194737,2,1
Cook,Peterson Park Health Care,2020-11-27,221,35,0.158371,2,1
Cook,Symphony at 87th,2020-11-27,182,34,0.186813,1,1
Cook,Elevate Care Chicago North,2020-11-27,184,34,0.184783,2,1
Cook,Glenview Terrace,2020-11-27,190,33,0.173684,1,1



County Data


Unnamed: 0_level_0,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cook,2020-11-27,16613,2547,0.153314,447,360
DuPage,2020-11-27,3377,499,0.147764,136,95
Lake,2020-11-27,2749,379,0.137868,108,91
Kane,2020-11-27,2057,244,0.118619,91,72
Will,2020-11-27,1899,234,0.123223,71,42
Madison,2020-11-27,1347,155,0.115071,51,39
St. Clair,2020-11-27,1084,141,0.130074,46,44
Winnebago,2020-11-27,900,115,0.127778,43,39
Kankakee,2020-11-27,770,47,0.061039,21,16
Rock Island,2020-11-27,697,71,0.101865,23,22


In [9]:
def facility2CMSNum (facilityName):
    if facilityName in ltc_name2cms_id:
        return ltc_name2cms_id[facilityName]
    else:
        return "No Match"
df_facilities.reset_index(inplace=True)
#facility2CMSNum('Abbington Rehab Nursing Center')
df_facilities['CMS_ProvNum'] = df_facilities['FacilityName'].apply(lambda x: facility2CMSNum(x))

In [10]:
df_facilities.head()

Unnamed: 0,County,FacilityName,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities,CMS_ProvNum
0,Adams,Adams Pointe Senior Living,2020-11-27,4,0,0.0,1,1,No Match
1,Adams,Bradford Villa,2020-11-27,8,1,0.125,1,1,No Match
2,Adams,Cedarhurst,2020-11-27,24,1,0.041667,1,1,No Match
3,Adams,Chaddock,2020-11-27,3,0,0.0,1,1,No Match
4,Adams,Golden Good Shepperd Home,2020-11-27,52,8,0.153846,1,1,146111


# Play Area

In [11]:
df_facilities.describe()

Unnamed: 0,confirmed_cases,deaths,CFR,outbreaks,facilities
count,1319.0,1319.0,1319.0,1319.0,1319.0
mean,34.228203,4.4837,0.090985,1.20091,1.0
std,40.480276,7.132467,0.110524,0.449041,0.0
min,2.0,0.0,0.0,1.0,1.0
25%,6.0,0.0,0.0,1.0,1.0
50%,17.0,1.0,0.051724,1.0,1.0
75%,50.0,6.0,0.163485,1.0,1.0
max,277.0,54.0,1.0,3.0,1.0


In [12]:
df = pd.DataFrame([[4, 9], [5, 10], [6,12]] , columns=['A', 'B'])
df

Unnamed: 0,A,B
0,4,9
1,5,10
2,6,12


In [13]:
df['A']

0    4
1    5
2    6
Name: A, dtype: int64

In [14]:
%%timeit
df.assign(C = lambda x: (x['A']*2))

354 µs ± 556 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [18]:
# NO WORK
# %%timeit
#df.apply(lambda x: x['A'] *3)