### Doing
 - [ ] Add CMS_IDs to Outbreaks and Facilities

### To Do
 - [ ] Group Facilities by CMS_Provider_Num WHERE Available
 
### DONE
 - [x] Separate funtions into separate Python files


In [1]:
import pandas as pd
import urllib3 as urllib
import urllib.request as urllib2
import json
import glob
import IPython.display

pd.options.display.max_columns = None

http = urllib.PoolManager()

# Load Facility Name to CMS ID json file
fac2CMS_file = 'IL_FacilityName_to_CMS_ID.json'
with open(fac2CMS_file) as f:
  ltc_name2cms_id = json.load(f) 

def getResponse(url):
    operUrl = http.request('GET', url)
    if(operUrl.status==200):
        data = operUrl.data
        jsonData = json.loads(data.decode('utf-8'))
    else:
        print("Error receiving data", operUrl.getcode())
    return jsonData

def facility2CMSNum (facilityName):
    if facilityName in ltc_name2cms_id:
        return ltc_name2cms_id[facilityName]
    else:
        return "No Match"
    
# df_facilities.reset_index(inplace=True) # Needed because used group by to get facility level data ToDo: COnsider moving this code up
# df_facilities['county-facName']= df_facilities['County'].str.upper() + '-' + df_facilities['FacilityName'].str.upper()
# df_facilities['CMS_ProvNum'] = df_facilities['county-facName'].apply(lambda x: facility2CMSNum(x))


def pull_IL_json_from_file(file):
    '''
    - Get IL data from JSON file
    
    Return: Reporting Date: str, DataFrame of Outbreak data: dict
    '''
    #Get IL data from JSON
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    ltc_data_json = json.dumps(ltc_data)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    with open(file, "w") as f:
        f.write(ltc_data_json)
    
    # Get Reporting Date
    reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    return reporting_date, ltc_data

def pull_IL_json_from_web():
    '''
    - Get IL data from JSON
    - Store IL data in Source Data w/Date Stamp
    
    Return: Reporting Date: str, DataFrame of Outbreak data: dict
    '''
    #Get IL data from JSON
    ltc_data = getResponse('https://idph.illinois.gov/DPHPublicInformation/api/covid/getltcdata')
    ltc_data_json = json.dumps(ltc_data)

    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    #Saving a copy of source data 
    ltc_data_json = json.dumps(ltc_data)
    file = "Source_data/IL_" + reporting_date + "_LTC_data_Source.json"
    with open(file, "w") as f:
        f.write(ltc_data_json)
    
    # Get Reporting Date
    reporting_date = '%d-%02d-%02d' % (ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    return reporting_date, ltc_data

def outbreak_df_from_file(outbreak_data, ltc_name2cms_id):
    """ From Json file:
        1) return DataFrame augmented and save to file
        2) return Summary data"""
    ltc_data = outbreak_data # TODO Refactor NAME
    

    
    # Extract Reporting Data
    reporting_date = '%d-%02d-%02d' %(ltc_data['LastUpdateDate']['year'], ltc_data['LastUpdateDate']['month'], ltc_data['LastUpdateDate']['day'])

    # Build DataFrame
    df = pd.DataFrame(ltc_data['FacilityValues'])
    df.insert(0, 'reporting_date', reporting_date)
    df['CFR'] = (df['deaths'] / df['confirmed_cases'])
    df['outbreaks'] = 1 # to allow counting # of outbreaks by Facility
    df['county-facName']= df['County'].str.upper() + '-' + df['FacilityName'].str.upper()
    df['CMS_ProvNum'] = df['county-facName'].apply(lambda x: facility2CMSNum(x))
    
    #Save Outbreak data to a file
    outbreak_file = 'Reporting_data/IL_' + reporting_date + '_Outbreaks_LTC_data_v4.csv'
    df.to_csv(outbreak_file, index = False)
    
    # Get summary data from feed - Note this may not match totals - ST-TODO: Check if summary data and totals from raw data match
    deaths = ltc_data['LTC_Reported_Cases']['deaths']
    confirmed_cases = ltc_data['LTC_Reported_Cases']['confirmed_cases']
    facility_cnt = len(df.groupby(['County', 'FacilityName']).size().reset_index().rename(columns={0:'count'}).sort_values(by='count', ascending=False))
    
    summary = {}
    summary['Date'] = reporting_date
    summary['Cases'] = confirmed_cases
    summary['Deaths'] = deaths
    summary['Outbreaks'] = df.reporting_date.value_counts()[0]
    summary['Open Outbreaks'] = df.status.value_counts()['Open']
    summary['Closed Outbreaks'] = df.status.value_counts()['Closed']
    summary['Facilities'] = facility_cnt
    
    return df, summary, reporting_date

def process_IL_dict(IL_data, ltc_name2cms_id, display_dfs=False, display_summary=True):
    '''Process a JSON file to:
       Inputs: 
           IL_data - Dictionary of outbreaks in IL for a particular date
           ltc_name2cms_id - Dictionary of Facility Names to CMS Federal Provider Numbers - Note can be more than one name for same number
           display_dfs - Flag to indicate whether or not to display top 10 values for each of the DataFrames
           display_summary - Flag to indicate whether or not to display Summary info
       Steps:
            1) Produce Summary Info
            2) Produce Outbreak file and dataframe
            3) Produce Facility file and dataframe
            4) Produce County file and dataframe
        
    '''
    [outbreak_df, summary, reporting_date] = outbreak_df_from_file(IL_data, ltc_name2cms_id)

    if display_summary:
        for k,v in summary.items():
            print(k + ": " + str(v))    

    # Augment Outbreak DF to count open/closed
    outbreak_df['Closed_Outbreaks'] = outbreak_df['status'].apply(lambda x: 1 if x == "Closed" else 0)
    outbreak_df['Open_Outbreaks'] = outbreak_df['status'].apply(lambda x: 1 if x == "Open" else 0)

    # Save and Display Facility data
    df_facilities = outbreak_df.groupby(['County', 'FacilityName', 'CMS_ProvNum']).sum()
    df_facilities['CFR'] = df_facilities['deaths'] / df_facilities['confirmed_cases']
    df_facilities['facilities'] = 1
    df_facilities.insert(0, 'ReportingDate', reporting_date)
    df_facilities.sort_values(by='confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_Facilities_LTC_data_v4.csv')

    # Save and Display County Level Data
    df_county = df_facilities.groupby(by=['County']).sum()
    df_county['CFR'] = (df_county['deaths'] / df_county['confirmed_cases'])
    df_county.insert(0, 'ReportingDate', reporting_date)
    filename = 'Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v4.csv'
    df_county.sort_values('confirmed_cases', ascending=False).to_csv('Reporting_data/IL_' + reporting_date + '_County_LTC_stats_v4.csv')
    
    
    if display_dfs:
        print("\nOutbreak Data\n=============")
        display(outbreak_df.sort_values(by='deaths', ascending=False).head(10))
        print("\nFacility Data\n=============")
        display(df_facilities.sort_values('deaths', ascending=False).head(10))
        print("\nCounty Data\n===========")
        display(df_county.sort_values(by='confirmed_cases', ascending=False).head(10))

    return reporting_date, summary, outbreak_df, df_facilities, df_county
pd.options.display.max_columns = None

## 1 - Get "Raw Data" from website and store in file

In [2]:
[reporting_date, ltc_data] = pull_IL_json_from_web()

In [3]:
# print('Source File: ' + str(json_file))
[reporting_date, summary, outbreak_df, df_facilities, df_county] = process_IL_dict(ltc_data, ltc_name2cms_id, display_dfs=False)

Date: 2020-12-18
Cases: 59970
Deaths: 7559
Outbreaks: 1802
Open Outbreaks: 1025
Closed Outbreaks: 777
Facilities: 1476


In [4]:
outbreak_df.groupby(['County', 'FacilityName']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,confirmed_cases,deaths,CFR,outbreaks,Closed_Outbreaks,Open_Outbreaks
County,FacilityName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adams,Adams Pointe Senior Living,4,0,0.000000,1,1,0
Adams,Bickford Cottage (2),3,0,0.000000,1,0,1
Adams,Bradford Villa,8,1,0.125000,1,1,0
Adams,Bradford Villa (2),7,0,0.000000,1,0,1
Adams,Cedarhurst,26,1,0.038462,1,1,0
...,...,...,...,...,...,...,...
Woodford,El Paso Health Care Center,75,0,0.000000,1,0,1
Woodford,Heritage Health El Paso,43,9,0.209302,1,0,1
Woodford,Snyder Village Assisted Living,2,0,0.000000,1,1,0
Woodford,Snyder Village Health Center,45,5,0.111111,1,0,1


In [5]:
df_facilities.sort_values(by='Open_Outbreaks', ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ReportingDate,confirmed_cases,deaths,CFR,outbreaks,Closed_Outbreaks,Open_Outbreaks,facilities
County,FacilityName,CMS_ProvNum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Lake,Rolling Hills Manor,145443,2020-12-18,122,18,0.147541,3,1,2,1
Cook,Franciscan Village,146029,2020-12-18,52,6,0.115385,2,0,2,1
Cook,Symphony of Morgan Park,145764,2020-12-18,206,26,0.126214,2,0,2,1
Cook,Balmoral Home,145796,2020-12-18,40,0,0.0,2,0,2,1
Cook,Lakeview Nursing and Rehab,145654,2020-12-18,96,22,0.229167,2,0,2,1
Cook,Austin Oasis,145834,2020-12-18,109,16,0.146789,2,0,2,1
Cook,Alden North Shore,No Match,2020-12-18,12,3,0.25,2,0,2,1
Macoupin,Carlinville Heritage Health,145456,2020-12-18,14,1,0.071429,2,0,2,1
Cook,Arbour Health Care Center,146169,2020-12-18,68,3,0.044118,3,1,2,1
Cook,Symphony of South Shore,145977,2020-12-18,222,32,0.144144,2,0,2,1


In [6]:
outbreak_df.head(5)

Unnamed: 0,reporting_date,County,FacilityName,confirmed_cases,deaths,ReportDate,status,CFR,outbreaks,county-facName,CMS_ProvNum,Closed_Outbreaks,Open_Outbreaks
0,2020-12-18,Adams,Chaddock,11,0,2020-12-18T00:00:00,Open,0.0,1,ADAMS-CHADDOCK,No Match,0,1
1,2020-12-18,Adams,Illinois Veterans Home Quincy,181,4,2020-12-18T00:00:00,Open,0.022099,1,ADAMS-ILLINOIS VETERANS HOME QUINCY,No Match,0,1
2,2020-12-18,Adams,Good Samaritan Home,42,1,2020-12-18T00:00:00,Open,0.02381,1,ADAMS-GOOD SAMARITAN HOME,145773,0,1
3,2020-12-18,Adams,Adams Pointe Senior Living,4,0,2020-12-18T00:00:00,Closed,0.0,1,ADAMS-ADAMS POINTE SENIOR LIVING,No Match,1,0
4,2020-12-18,Adams,Bradford Villa,8,1,2020-12-18T00:00:00,Closed,0.125,1,ADAMS-BRADFORD VILLA,No Match,1,0
