# (Prototype) COVID Dashboard Map

This is a prototype COVID Dashboard for testing displaying COVID data on maps (initially testing using the folium library).

In [1]:
# This forces a reload of any external library file if it changes.  
# Useful when developing external libraries since otherwise Jupyter 
# will not re-import any library without restarting the python kernel.

%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import numpy as np
import time
import ipywidgets as widgets

# Import COVID IO routines from external python libraries
import COVIDlib.data_IO as COVID_IO
import COVIDlib.dashboard_IO as COVID_Dash

# imports for ipyleaflet 
import ipyleaflet as lf
import json
from branca.colormap import linear

## Define variables of interest below
data_dir = 'our_data/'    # Data directory for the COVID datafiles

In [3]:
# Load all the dataframes into memory
print("Loading and Preprocessing COVID Dataset ... ", end='')

start= time.perf_counter()
# Retrieve John Hopkins dataframes and add "rates" of deaths/infections
(JH_state_df, JH_cnty_df) = COVID_IO.PtoCDRDataFrames()
JH_state_df = COVID_Dash.cleanJHdata(JH_state_df)
JH_cnty_df = COVID_Dash.cleanJHdata(JH_cnty_df)

# Construct dictionary of FIPS values by placename
FIPSd = COVID_Dash.build_fipsdict(JH_cnty_df, JH_state_df)

# Retrieve Apple Mobility Dataframe
(aapl_cnty_df, aapl_state_df) = COVID_IO.PtoAAPLMobilityDataFrames()
COVID_Dash.cleanAAPLdata(aapl_cnty_df)
COVID_Dash.cleanAAPLdata(aapl_state_df)

# Retrieve Google Mobility Dataframe
(goog_cnty_df, goog_state_df) = COVID_IO.PtoGOOGMobilityDataFrames()

# Retrieve IMHE Dataframes
(summary_df, hospitalization_df) = COVID_IO.PtoIMHEDataFrames()

end= time.perf_counter()

print(f"Done ({end-start:0.2f} sec)")

Loading and Preprocessing COVID Dataset ... Done (1.50 sec)


In [4]:
## This initial test version is based on a prototype seen at https://python-graph-gallery.com/292-choropleth-map-with-folium/

# # Location of JSON with the shapes of the US states (note in this JSON, the "id" field [used to match to data] consists of 2 letter postal codes [e.g. MN or ND])
# state_geo = './folium_jsons/us-states.json'

# # Location of JSON with the shapes of the US counties (only has four keys, none are ID, don't know if it is just stored in FIPS number order)
# county_geo = './folium_jsons/us_counties_20m_topo.json'

# Grab only the state data and then redefine columns to have only last day's data
state_data = JH_state_df[JH_state_df['state'] != 'United States'].set_index('postal').copy()

state_data['ConfirmedRate'] = np.array(state_data['ConfirmedRate'].to_list())[:,-1].tolist()
state_data['DeathsRate'] = np.array(state_data['DeathRate'].to_list())[:,-1].tolist()
state_data['dConfirmedRate'] = np.array(state_data['dConfirmedRate'].to_list())[:,-1].tolist()
state_data['dDeathsRate'] = np.array(state_data['dDeathsRate'].to_list())[:,-1].tolist()
state_data['d2ConfirmedRate'] = np.array(state_data['d2ConfirmedRate'].to_list())[:,-1].tolist()
state_data['d2DeathsRate'] = np.array(state_data['d2DeathsRate'].to_list())[:,-1].tolist()


In [5]:
county_data = JH_cnty_df.set_index('FIPS').copy()  # Index by FIPS in this copy of the county data

# only retain the most recent value in these columns
county_data['ConfirmedRate'] = np.array(county_data['ConfirmedRate'].to_list())[:,-1].tolist()
county_data['DeathsRate'] = np.array(county_data['DeathRate'].to_list())[:,-1].tolist()
county_data['dConfirmedRate'] = np.array(county_data['dConfirmedRate'].to_list())[:,-1].tolist()
county_data['dDeathsRate'] = np.array(county_data['dDeathsRate'].to_list())[:,-1].tolist()
county_data['d2ConfirmedRate'] = np.array(county_data['d2ConfirmedRate'].to_list())[:,-1].tolist()
county_data['d2DeathsRate'] = np.array(county_data['d2DeathsRate'].to_list())[:,-1].tolist()


## Implementing Maps using iPyLeaflet 

In [6]:
# ipyleaflet map testing below
# The reason for using ipyleaflet is that it directly supports widgets
# Luke's to do: get JSON data to work, add choropleth

In [7]:
##
## Define useful functions for building Map data

def load_statedata(filename = './ipyleaflet_json/us-states.json', file_type=json.load ):
    # Define this function for loading the state boundaries JSON (grabbed from the GitHub repo for iPyLeaflet)
    with open(filename, 'r') as f:
        # Had to set the encoding for this import to work on Mac (should still work on PC)
        return file_type(f)
    

def load_countydata(filename = './ipyleaflet_json/gz_2010_us_050_00_20m.json', file_type=json.load ):
    # Define this function for loading the county boundaries JSON file which was originally
    # grabbed from https://github.com/kjhealy/us-county
    with open(filename, 'r', encoding="ISO-8859-1") as f:
        # Had to set the encoding for this import to work on Mac (should still work on PC)
        return file_type(f)


def format_cnty_dict(dataframe):
    # ipyleaflet expects dictionaries of data, keyed by the ID, so converting county data to be right format
    data_dict = {}
    for key in dataframe.to_dict():
        if key < 10000:
            newKey = '0500000US0' + str(key)
            data_dict[newKey] = dataframe[key]

    return data_dict


def format_state_dict(dataframe):
    # ipyleaflet expects dictionaries of data, keyed by the ID, so converting state pandas dataframe into dictionary
    return dataframe.to_dict()

In [8]:
# Loads a county border json file useful for ipyleaflet since the county 
# JSON that comes with folium doesn't have a 'properties' field, which (I think) ipyleaflet requires for choropleths
geo_json_data = load_countydata()
geo_json_states_data = load_statedata()

# Pick the data series to plot
state_data_series = state_data['dConfirmedRate']
county_data_series = county_data['dConfirmedRate']

# ipyleaflet requires a dictionary for the choro_data field/the variable to be visualized, so convert the Pandas data series
# into the appropriate dictionary (handling the conversion of the FIPS indices to the keys used in county JSON data)
county_data_dict = format_cnty_dict(county_data_series)
state_data_dict = format_state_dict(state_data_series)

In [10]:
# Define map center and zoom
center = [38.0, -93.0]
zoom = 3.9

# Determine range of values for colormap, then define colormap
minval = state_data_series.min()
maxval = state_data_series.max()
cmap=linear.YlOrRd_04.scale(minval,maxval)

# Break range into steps to build colormap legend dictionary
nsteps = 5
step = (maxval-minval)/(nsteps-1)
legendDict = {}
for i in range(nsteps):
    val = minval+i*step
    valstr = f"{val:,.1f}"
    legendDict[valstr] = cmap(val)

# Creating the map
test_map = lf.Map(center = center, zoom = zoom)
    
# Draw a functional states layer
states_layer = lf.Choropleth(geo_data=geo_json_states_data,
                             choro_data=state_data_dict,
                             key_on='id',
                             # Below here is some formatting/coloring from the documentation
                             colormap=cmap,
                             border_color='black',
                             style={'fillOpacity': 0.8, 'dashArray': '5, 5'} )
test_map.add_layer(states_layer)

# Display a legend    
legend = lf.LegendControl(legendDict, name="Legend", position="topright")
test_map.add_control(legend)

# Display map
display(test_map)

Map(center=[38.0, -93.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_ou…

In [11]:
# Should now attempt a county plot again, although I suspect we will have to manipulate the county_data_dict dictionary 
# to get it into a format ipyleaflet will understand.  Possibly simply copying GEO_ID to id in the proper place in the dictionary?

# Example of contents of the county GEOJSON data structure (NOT CURRENTLY WORKING)
# {'type': 'FeatureCollection',
#  'features': [{'type': 'Feature',
#    'properties': {'GEO_ID': '0500000US01001',
#     'STATE': '01',
#     'COUNTY': '001',
#     'NAME': 'Autauga',
#     'LSAD': 'County',
#     'CENSUSAREA': 594.436},
#    'geometry': {'type': 'Polygon',
#     'coordinates': [[[-86.496774, 32.344437],
#       [-86.717897, 32.402814],
#       [-86.814912, 32.340803],
#       [-86.890581, 32.502974],
#       [-86.917595, 32.664169],
#       [-86.71339, 32.661732],
#       [-86.714219, 32.705694],
#       [-86.413116, 32.707386],
#       [-86.411172, 32.409937],
#       [-86.496774, 32.344437]]]}},

# Example of contents of state GEOJSON data structure (WHICH DOES WORK)
# {'type': 'FeatureCollection',
#  'features': [{'type': 'Feature',
#    'id': 'AL',
#    'properties': {'name': 'Alabama'},
#    'geometry': {'type': 'Polygon',
#     'coordinates': [[[-87.359296, 35.00118],
#       [-85.606675, 34.984749],
#       [-85.431413, 34.124869],
#       [-85.184951, 32.859696],
#       [-85.069935, 32.580372],
#       [-84.960397, 32.421541],

# I suspect if we moved the GEO_ID (or simply copied it) to the features level as id, it might work.