In [1]:
import pandas as pd
import urllib3 as urllib
import json
import IPython.display

In [2]:
http = urllib.PoolManager()

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

# 1 - Pull JSON File from Website

In [3]:
def pull_IL_json_from_web():
    ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    f = open(file, "w")
    f.write(ltc_data_json)
    f.close()
    return file

In [4]:
# ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

# # Extract Reporting Data
# reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

# #Saving a copy of source data 
# ltc_data_json = json.dumps(ltc_data)
# f = open("Source_data/IL_" + reporting_date + "_LTC_data_Source.json","w")
# f.write(ltc_data_json)
# f.close()

In [5]:
json_file = pull_IL_json_from_web()
with open(json_file) as f:
  ltc_data = json.load(f)
          
# Extract Reporting Data
reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

# 2 - Put Outbreak data in DataFrame and Augment
Data is at the Outbreak level. A Facility can have 1 to Many Outbreaks (not sure about 0)

In [6]:
def outbreak_df_from_file(filename):
    with open(filename) as f:
      ltc_data = json.load(f)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

In [7]:
df = pd.DataFrame(ltc_data['FacilityValues'])
df['reporting_date'] = reporting_date
df['CFR'] = (df['deaths'] / df['confirmed_cases'])
df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility

#Save Outbreak data to a file
outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v2.csv'
df.to_csv(outbreak_file, index = False)

df.sort_values(by='deaths', ascending=False).head(5)

Unnamed: 0,County,FacilityName,confirmed_cases,deaths,status,reporting_date,CFR,outbreaks
179,Cook,Niles Nursing and Rehab Center,213,54,Closed,2020-11-06,0.253521,1
194,Cook,Norridge Gardens,167,46,Open,2020-11-06,0.275449,1
116,Cook,Villa at Windsor Park,160,44,Closed,2020-11-06,0.275,1
1207,Will,Meadowbrook Manor of Bolingbrook,188,41,Closed,2020-11-06,0.218085,1
229,Cook,Woodbridge Nursing Pavilion,219,40,Closed,2020-11-06,0.182648,1


# 3 - Print Summary Data

In [8]:
# Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
deaths = ltc_data['LTC_Reported_Cases']['deaths']
confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']

print ('Date: %s' % reporting_date)
print ('Cases: %d' % confirmed_cases)
print ('Deaths:  %d'% deaths)
print ('Outbreaks: %d' % df.reporting_date.value_counts()[0])
print ('Facilities: %d' % len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False)))

Date: 2020-11-06
Cases: 36683
Deaths:  5253
Outbreaks: 1309
Facilities: 1116


# 4 - Get Facility Level data, augment and save
Facilities can have multiple outbreaks, need to sum these to get counts at the Facility level

In [9]:
df_facilities = df.groupby(['County', 'FacilityName']).sum()
df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v2.csv')
df_facilities.sort_values(by='confirmed_cases', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,confirmed_cases,deaths,CFR,outbreaks
County,FacilityName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Cook,Ludeman Developmental Center,263,9,0.034221,1
Cook,City View Multi Care Center,249,15,0.060241,1
Kankakee,Samuel H. Shapiro Developmental Center,232,1,0.00431,2
Lake,Avantara Long Grove,228,32,0.140351,1
Cook,Peterson Park Health Care,221,35,0.158371,2
Cook,Symphony at Midway,220,27,0.122727,2
Cook,Woodbridge Nursing Pavilion,219,40,0.182648,1
Cook,Niles Nursing and Rehab Center,216,54,0.25,2
Cook,Symphony of Morgan Park,202,26,0.128713,1
Cook,Symphony of South Shore,197,29,0.147208,2


# 4 - County Level Data & Charts

In [10]:
# County Level Data
df_county = df.groupby(by=['County']).sum()
df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
df_county.sort_values('deaths', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv')
df_county.sort_values('deaths', ascending=False).head(10)

Unnamed: 0_level_0,confirmed_cases,deaths,CFR,outbreaks
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cook,15294,2450,0.160194,409
DuPage,2872,463,0.161212,118
Lake,2319,342,0.147477,90
Will,1533,212,0.138291,59
Kane,1497,198,0.132265,77
St. Clair,951,132,0.138801,39
Madison,966,112,0.115942,39
Winnebago,812,107,0.131773,39
McHenry,554,92,0.166065,22
Rock Island,476,69,0.144958,17


In [11]:
# import altair as alt
# df1=df_county.sort_values(by=['deaths'], ascending=False).reset_index()
# cols = ['Deaths Non LTC', 'LTC Deaths']
# cols = ['LTC Deaths', 'Deaths Non LTC']23


# chart1 = alt.Chart(df_county.sort_values(by=['deaths'], ascending=False).reset_index()).mark_bar().encode(
#     x='deaths:Q',
#     y=alt.Y('County:O', sort='-x'),
#     tooltip=['County', 'deaths', 'confirmed_cases', 'CFR']
# )
# chart2=chart1.encode(x=alt.X('CFR', axis=alt.Axis(format='%')))
# #chart2=chart1.encode(x=alt.X('CFR'))


# chart1 | chart2

In [12]:
# import altair as alt
# df1=df_county.sort_values(by=['deaths'], ascending=False).reset_index()
# cols = ['Deaths Non LTC', 'LTC Deaths']
# cols = ['LTC Deaths', 'Deaths Non LTC']


# chart1 = alt.Chart(df_county.sort_values(by=['deaths'], ascending=False).reset_index()).mark_bar().encode(
#     x='deaths:Q',
#     y=alt.Y('County:O'),
#     tooltip=['County', 'deaths', 'confirmed_cases', 'CFR']
# )
# chart2=chart1.encode(x=alt.X('CFR', axis=alt.Axis(format='%')))
# #chart2=chart1.encode(x=alt.X('CFR'))


# chart1 | chart2 