In [1]:
import pandas as pd
import urllib3 as urllib
import json
import glob
import IPython.display

# Functions

In [2]:
http = urllib.PoolManager()

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

In [3]:
def pull_IL_json_from_web():
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    #ltc_data = getResponse('http://www.dph.illinois.gov/sitefiles/COVIDLTC.json')

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    f = open(file, "w")
    f.write(ltc_data_json)
    f.close()
    return file

In [4]:
def outbreak_df_from_file(filename):
    """ From Json file:
        1) return DataFrame augmented and save to file
        2) return Summary data"""
    with open(filename) as f:
      ltc_data = json.load(f)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
    df = pd.DataFrame(ltc_data['FacilityValues'])
    df.insert(0, 'reporting_date', reporting_date)
    df['CFR'] = (df['deaths'] / df['confirmed_cases'])
    df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility
    #Save Outbreak data to a file
    outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v2.csv'
    df.to_csv(outbreak_file, index = False)
    
    # Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
    deaths = ltc_data['LTC_Reported_Cases']['deaths']
    confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']
    facility_cnt = len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False))
    summary = {}
    summary['Date'] = reporting_date
    summary['Cases'] = confirmed_cases
    summary['Deaths'] = deaths
    summary['Outbreaks'] = df.reporting_date.value_counts()[0]
    summary['Facilities'] = facility_cnt
    
    return df, summary, reporting_date

In [5]:
def process_json_IL (filename, display_dfs=False, display_summary=True):
    """Process a JSON file to:
       1) Produce Summary Info
       2) Produce Outbreak file and dataframe
       3) Produce Facility file and dataframe
       4) Produce County file and dataframe
        
       TODO - make display dataframes optional
       TODO - make display summary info optional"""
    [outbreak_df, summary, reporting_date] = outbreak_df_from_file(filename)

    # Print Summary Data
    if display_summary:
        for k,v in summary.items():
            print(k + ": " + str(v))    

    # Save and Display Facility data
    df_facilities = outbreak_df.groupby(['County', 'FacilityName']).sum()
    df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
    df_facilities['facilities'] = 1
    df_facilities.insert(0, 'ReportingDate', reporting_date)
    df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v2.csv')

    # Save and Display County Level Data
    df_county = df_facilities.groupby(by=['County']).sum()
    df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
    df_county.insert(0, 'ReportingDate', reporting_date)
    filename = 'Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv'
    df_county.sort_values('confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v2.csv')
    
    
    if display_dfs:
        print("\nOutbreak Data\n=============")
        display(outbreak_df.sort_values(by='deaths', ascending=False).head(5))
        print("\nFacility Data\n=============")
        display(df_facilities.sort_values('deaths', ascending=False).head(10))
        print("\nCounty Data\n===========")
        display(df_county.sort_values(by='confirmed_cases', ascending=False).head(10))

    return reporting_date, summary, outbreak_df, df_facilities, df_county

# 1 - Pull JSON File from Website

In [8]:
json_file = pull_IL_json_from_web()
!chmod 444 $json_file
with open(json_file) as f:
  ltc_data = json.load(f)
          
# Extract Reporting Data
reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])
reporting_date

'2020-11-13'

# 2 - Process JSON File to Create Files and DFs
Data is at the Outbreak level. A Facility can have 1 to Many Outbreaks (not sure about 0).
Will create a file and data frame at the level of: Outbreaks, Facilities and Counties

In [9]:
print('Source File: ' + str(json_file))
[reporting_date, summary, outbreak_df, df_facilities, df_county] = process_json_IL(json_file, display_dfs=True)

Source File: Source_data/IL_2020-11-13_LTC_data_Source.json
Date: 2020-11-13
Cases: 39686
Deaths: 5493
Outbreaks: 1406
Facilities: 1185

Outbreak Data


Unnamed: 0,reporting_date,County,FacilityName,confirmed_cases,deaths,ReportDate,status,CFR,outbreaks
191,2020-11-13,Cook,Niles Nursing and Rehab Center,213,54,2020-11-13T00:00:00,Closed,0.253521,1
206,2020-11-13,Cook,Norridge Gardens,167,46,2020-11-13T00:00:00,Open,0.275449,1
128,2020-11-13,Cook,Villa at Windsor Park,160,44,2020-11-13T00:00:00,Closed,0.275,1
1294,2020-11-13,Will,Meadowbrook Manor of Bolingbrook,188,41,2020-11-13T00:00:00,Closed,0.218085,1
241,2020-11-13,Cook,Woodbridge Nursing Pavilion,219,40,2020-11-13T00:00:00,Closed,0.182648,1



Facility Data


Unnamed: 0_level_0,Unnamed: 1_level_0,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities
County,FacilityName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cook,Niles Nursing and Rehab Center,2020-11-13,216,54,0.25,2,1
Cook,Norridge Gardens,2020-11-13,167,46,0.275449,1,1
Cook,Villa at Windsor Park,2020-11-13,162,44,0.271605,2,1
Will,Meadowbrook Manor of Bolingbrook,2020-11-13,193,41,0.212435,2,1
Cook,Woodbridge Nursing Pavilion,2020-11-13,219,40,0.182648,1,1
DuPage,Manorcare Hinsdale,2020-11-13,173,37,0.213873,2,1
Cook,Peterson Park Health Care,2020-11-13,221,35,0.158371,2,1
Cook,Symphony at 87th,2020-11-13,182,34,0.186813,1,1
Cook,Elevate Care Chicago North,2020-11-13,184,34,0.184783,2,1
Cook,Glenview Terrace,2020-11-13,190,33,0.173684,1,1



County Data


Unnamed: 0_level_0,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,facilities
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cook,2020-11-13,15863,2481,0.156402,428,349
DuPage,2020-11-13,3115,485,0.155698,125,91
Lake,2020-11-13,2378,343,0.144239,93,81
Will,2020-11-13,1734,225,0.129758,67,41
Kane,2020-11-13,1684,208,0.123515,81,65
Madison,2020-11-13,1125,121,0.107556,44,35
St. Clair,2020-11-13,957,137,0.143156,41,39
Winnebago,2020-11-13,812,108,0.133005,39,35
LaSalle,2020-11-13,637,68,0.10675,15,13
Kankakee,2020-11-13,612,45,0.073529,20,15


In [16]:
df_facilities.FacilityName.unique()

AttributeError: 'DataFrame' object has no attribute 'FacilityName'

In [10]:
# Process all JSON files for IL
for file in glob.glob("Source_data/IL_*.json"):
    process_json_IL(file)

Date: 2020-06-05
Cases: 18837
Deaths: 3053
Outbreaks: 554
Facilities: 554
Date: 2020-10-02
Cases: 30243
Deaths: 4697
Outbreaks: 967
Facilities: 891
Date: 2020-10-09
Cases: 30920
Deaths: 4792
Outbreaks: 1008
Facilities: 920
Date: 2020-10-23
Cases: 33440
Deaths: 5019
Outbreaks: 1151
Facilities: 1015
Date: 2020-11-06
Cases: 36683
Deaths: 5253
Outbreaks: 1309
Facilities: 1116
Date: 2020-11-13
Cases: 39686
Deaths: 5493
Outbreaks: 1406
Facilities: 1185
Date: 2020-06-19
Cases: 21390
Deaths: 3649
Outbreaks: 593
Facilities: 592
Date: 2020-09-18
Cases: 28941
Deaths: 4575
Outbreaks: 889
Facilities: 829
Date: 2020-06-12
Cases: 20550
Deaths: 3433
Outbreaks: 580
Facilities: 580
Date: 2020-10-30
Cases: 34278
Deaths: 5127
Outbreaks: 1209
Facilities: 1052
Date: 2020-08-28
Cases: 27126
Deaths: 4396
Outbreaks: 795
Facilities: 758
Date: 2020-08-21
Cases: 26355
Deaths: 4319
Outbreaks: 766
Facilities: 737
Date: 2020-07-10
Cases: 23324
Deaths: 3895
Outbreaks: 630
Facilities: 628
Date: 2020-09-11
Cases: 28189

In [None]:
# import altair as alt
# df1=df_county.sort_values(by=['deaths'], ascending=False).reset_index()
# cols = ['Deaths Non LTC', 'LTC Deaths']
# cols = ['LTC Deaths', 'Deaths Non LTC']23


# chart1 = alt.Chart(df_county.sort_values(by=['deaths'], ascending=False).reset_index()).mark_bar().encode(
#     x='deaths:Q',
#     y=alt.Y('County:O', sort='-x'),
#     tooltip=['County', 'deaths', 'confirmed_cases', 'CFR']
# )
# chart2=chart1.encode(x=alt.X('CFR', axis=alt.Axis(format='%')))
# #chart2=chart1.encode(x=alt.X('CFR'))


# chart1 | chart2

In [None]:
# import altair as alt
# df1=df_county.sort_values(by=['deaths'], ascending=False).reset_index()
# cols = ['Deaths Non LTC', 'LTC Deaths']
# cols = ['LTC Deaths', 'Deaths Non LTC']


# chart1 = alt.Chart(df_county.sort_values(by=['deaths'], ascending=False).reset_index()).mark_bar().encode(
#     x='deaths:Q',
#     y=alt.Y('County:O'),
#     tooltip=['County', 'deaths', 'confirmed_cases', 'CFR']
# )
# chart2=chart1.encode(x=alt.X('CFR', axis=alt.Axis(format='%')))
# #chart2=chart1.encode(x=alt.X('CFR'))


# chart1 | chart2 