In [2]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'facecolor' : "#1f1e1e"}

import io
import os
import pandas as pd
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
from IPython.display import Markdown, display, Image
from PIL import Image as PILImage

# LOCAL GIT REPOSITORY LOCATION FOR https://github.com/CSSEGISandData/COVID-19
directory = '/Users/davidmorton/Documents/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports_us/'

!git -C $local_covid_19_data pull

states_to_drop = ['Puerto Rico', 'American Samoa', 'Guam', 'District of Columbia', 'Northern Mariana Islands', 'Virgin Islands']

state_names = pd.read_csv('states.csv').set_index('State').to_dict()['Abbreviation']

fatal: cannot change to 'pull': No such file or directory


In [10]:
def load_csv(directory, f):
    result = pd.read_csv(directory + f)
    result['date'] = datetime.strptime(f, '%m-%d-%Y.csv')
    return result

def get_daily_report_data():
    return pd.concat([load_csv(directory, f) for f in os.listdir(directory) if f.endswith(".csv")])

def get_heatmap_data():
    return (
        get_daily_report_data()         # Get latest from the directory. 
        .copy()                         # Copy from the original to save time loading the data.
        .rename(columns={               # Rename the columns to shorter names
            'Province_State':'state', 
            'Incident_Rate':'rate'
        }) 
        [['state','date','rate']]       # Drop unnecessary columns
        .pivot(                         # Create a pivot table from the data.
            index='date', 
            columns='state', 
            values='rate'
        ) 
        .drop([                         # Keep the 50 states only.
            'Puerto Rico', 
            'American Samoa', 
            'Guam', 
            'District of Columbia', 
            'Northern Mariana Islands', 
            'Virgin Islands', 
            'Diamond Princess', 
            'Grand Princess', 
            'Recovered'
        ], axis=1)   
        .diff()                         # Get the date-to-date difference.
        .clip(0, 5000)                  # Clip the data at 30 percent. Some data points are too high. 
        .replace(0, np.nan)             # Get rid of pure 0 values. We'll interpolate these.
        .interpolate()                  # Interpolate the newly NAN values.
        .rolling(window=7).mean()       # 7-Day Rolling Average for smoothness.
        .dropna()                       # Drop the first few rows which are a result of the 7-Day rolling average
        .rename(state_names, axis=1)    # Rename the state names to abbreviations
    )

def make_usa_choropleth(series, locations, title, colorbartitle, colorspectrum, zmax):
    

    fig = go.Figure(
        data=go.Choropleth(
            locations=locations,
            z = series,
            locationmode = 'USA-states',
            colorscale = colorspectrum,
            colorbar_title = colorbartitle, 
            zmax=zmax,
            zmin=0
        )
    )

    fig.update_layout(
        title_text = title,
        geo_scope='usa', # limite map scope to USA
    )

    return fig;

def make_animation(totalduration, pausetime, timeinterpolation, theme):
    bgcolor="black"
    data = get_heatmap_data()
    data = data.rename(state_names, axis=1)
    
    vals = data.values.flatten().flatten()
    vals.sort()
    bins = pd.qcut(vals, q=8, retbins=True)[1]
    colorstops = (bins - bins.min()) / (bins.max() - bins.min())
    colorscale = list(zip(colorstops, getattr(px.colors.sequential, theme)))
    zmax = vals.max()

    data = data.resample(rule=timeinterpolation).asfreq().interpolate()
    
    adjduration = totalduration-pausetime;
    perslide = round((adjduration*1000)/len(data));
    repeatlast = round((pausetime*1000)/perslide)
    print('Frames will be shown for {0} milliseconds'.format(perslide))
    
    images = []
    for i in range(len(data)):
        row = data.iloc[i]
        fig = make_usa_choropleth(row, row.index, 'Daily Change in COVID-19 Confirmed Cases Per 100,000 People - ' + row.name.strftime('%B %d'), '', colorscale, zmax)
        fig.update_layout(
            plot_bgcolor=bgcolor,
            paper_bgcolor=bgcolor,
            font=dict(color="white"),
            geo = dict(
                lakecolor=bgcolor,
                showlakes=True,
                bgcolor=bgcolor
            )
        )
        img_bytes = fig.to_image(format='png')
        image = PILImage.open(io.BytesIO(img_bytes))

        images.append(image)
        if (i == (len(data) - 1)):
            for j in range(1,repeatlast):
                images.append(image)
        
    url = 'covid19-map-case-incidence-rate.gif'
    images[0].save(url,
                   save_all=True, append_images=images[1:], optimize=True, duration=perslide, loop=0)
    return url + "?c=" + str(random.randint(0,2e9))


In [13]:
url = make_animation(30, 5, '6H', "Inferno")
Image(url=url)

Frames will be shown for 83 milliseconds


In [6]:
get_heatmap_data()

state,AL,AK,AZ,AR,CA,CO,CT,DE,FL,GA,...,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-20,4.097826,1.051564,2.675118,3.106517,3.554248,6.937636,25.780333,17.619406,38.643060,8.583213,...,15.860247,4.737294,3.403527,4.237040,1.590964,5.859014,3.943547,3.142214,2.956767,1.206661
2020-04-21,4.155714,1.051564,2.839982,2.714754,3.683228,8.157773,25.527899,17.883477,4.199067,7.471566,...,14.889607,4.604554,3.460708,4.386583,1.544171,6.229380,4.482852,2.893860,2.954006,4.625533
2020-04-22,4.624907,1.003766,2.961668,3.901079,3.883257,9.196401,30.909153,20.538857,3.901008,8.773419,...,13.394823,5.723052,3.364992,4.471324,1.497378,6.803900,5.301271,2.559123,3.103087,4.596803
2020-04-23,4.530459,0.884270,3.012697,5.401919,4.329952,9.340095,28.913721,21.301729,4.242128,8.755103,...,12.521247,6.428097,3.588744,4.630836,1.193223,7.423586,6.412049,2.731891,3.249407,4.510613
2020-04-24,4.432965,0.716976,3.010735,6.152339,4.444358,10.784595,28.557108,19.643950,3.884861,7.463112,...,12.210643,6.482499,3.600554,4.929921,1.123034,7.412746,5.965467,2.537527,3.597262,4.826643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-30,20.243402,3.456481,40.921655,19.403892,14.373491,4.517360,2.464238,9.198474,32.545993,18.138649,...,6.636940,18.058030,19.770296,17.454110,1.098920,6.348262,6.449767,2.487041,8.165461,5.751222
2020-07-01,20.097724,3.632234,46.937236,18.092626,15.458508,4.646357,2.640541,9.110450,33.245721,19.995436,...,6.572347,19.886320,21.794229,17.520950,0.686825,6.174200,6.689898,2.774007,8.430446,5.726539
2020-07-02,20.118119,3.925156,47.425941,18.996784,16.428189,4.643876,2.612493,11.017630,36.641264,22.354093,...,6.685385,21.509608,21.963218,17.360534,0.915767,6.341568,7.098871,2.861691,8.614463,5.529072
2020-07-03,22.381953,4.432888,49.484781,18.376655,15.788966,4.502476,2.636534,13.291575,37.004431,23.543512,...,7.153685,22.371457,22.376577,17.004055,0.961555,6.398473,7.359638,3.156629,8.759223,5.282238
