# maps_us_data.ipynb

Display interactive maps of the most recent COVID-19 statistics with and without normalization by population.

Inputs:
* `outputs/us_counties_clean.csv`: The contents of `data/us_counties.csv` after data cleaning by [clean_us_data.ipynb](./clean_us_data.ipynb)
* `outputs/us_counties_clean_meta.json`: Column type metadata for reading `data/us_counties_clean.csv` with `pd.read_csv()`
* [U.S. map in GeoJSON format, from Plotly](https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json)

**Note:** You can redirect these input files by setting the environment variable `COVID_OUTPUTS_DIR` to a replacement for the prefix `outputs` in the above paths.

In [None]:
# Initialization boilerplate
import os
import json
import pandas as pd
import numpy as np
from urllib.request import urlopen
from typing import *

import text_extensions_for_pandas as tp

# Local file of utility functions
import util

# Allow environment variables to override data file locations.
_OUTPUTS_DIR = os.getenv("COVID_OUTPUTS_DIR", "outputs")
util.ensure_dir_exists(_OUTPUTS_DIR)  # create if necessary

In [None]:
# Read time series data from the binary file that clean_us_data.ipynb produces
dates_file = os.path.join(_OUTPUTS_DIR, "dates.feather")
cases_file = os.path.join(_OUTPUTS_DIR, "us_counties_clean.feather")
cases = pd.read_feather(cases_file).set_index("FIPS")
dates = pd.read_feather(dates_file)["date"].to_numpy()
cases.head()

In [None]:
# Normalize the Confirmed and Deaths counts by population.
cases["Confirmed_per_100"] =  100.0 * cases["Confirmed"].array / cases["Population"].values.reshape(-1,1)
cases["Deaths_per_100"] = 100.0 * cases["Deaths"].array / cases["Population"].values.reshape(-1,1)

cases

In [None]:
# Slice off the last element of each time series
latest_cases = cases[["State", "County", "Population"]].copy()
for col in ["Confirmed", "Confirmed_per_100", "Deaths", "Deaths_per_100"]:
    latest_cases[col] = cases[col].array._tensor[:,-1]
latest_cases

In [None]:
# Also show totals in the last week
cases_this_week = cases[["State", "County", "Population"]].copy()
cases_this_week["Confirmed"] = cases["Confirmed_7_Days"]
cases_this_week["Deaths"] = cases["Deaths_7_Days"]
cases_this_week["Confirmed_per_100"] = cases_this_week["Confirmed"] / cases_this_week["Population"]
cases_this_week["Deaths_per_100"] = cases_this_week["Deaths"] / cases_this_week["Population"]

cases_this_week

In [None]:
# Download a U.S. map in GEOJSON format
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [None]:
# Common code to generate choropleth maps.
# NOTE: In order for this to work you need the JupyterLab extensions for Plotly:
#   > jupyter labextension install jupyterlab-plotly
# (env.sh will run the above command for you)

import plotly.express as px

def draw_map(df, col_name, label_str):
    # Each series may have NAs in different locations
    valid_data = df[~df[col_name].isna()]
    
    fig = px.choropleth(valid_data, geojson=counties, 
                        locations=["{:05d}".format(f) for f in valid_data.index],
                        color=col_name,
                        # See https://plotly.com/python/builtin-colorscales/
                        color_continuous_scale="viridis",
                        # Top of scale == 95th percentile
                        range_color=(0, valid_data[col_name].quantile(0.95)),
                        scope="usa",
                        labels={col_name: label_str},
                        hover_name=valid_data["County"],
                        title=label_str)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    


In [None]:
# Draw a map of number of confirmed cases by county.
# Yellow == 95th percentile
draw_map(latest_cases, "Confirmed", "Confirmed Cases ")

In [None]:
# Draw a map of number of confirmed cases in the past 7 days by county.
# Yellow == 95th percentile
draw_map(cases_this_week, "Confirmed", "Confirmed Cases this Week")

In [None]:
# Draw a map of number of confirmed cases in the last week per 100 residents by county
# Yellow == 95th percentile
draw_map(cases_this_week, "Confirmed_per_100", "Confirmed per 100 this Week")

In [None]:
# Draw a map of number of deaths per 100 residents by county
# Yellow == 95th percentile
draw_map(cases_this_week, "Deaths_per_100", "Deaths per 100 this Week")

In [None]:
# Draw a map of number of confirmed cases per 100 residents by county
# Yellow == 95th percentile
#draw_map(latest_cases, "Confirmed_per_100", "Confirmed per 100")

In [None]:
# Draw a map of number of deaths by county
# Yellow == 95th percentile
#draw_map(latest_cases, "Deaths", "Total Deaths  ")

In [None]:
# Draw a map of number of deaths per 100 residents by county
# Yellow == 95th percentile
#draw_map(latest_cases, "Deaths_per_100", "Deaths per 100")