In [2]:
import numpy as np
import pandas as pd
import datetime as dt
pd.set_option('display.max_rows', 1000)
import hvplot.pandas
import holoviews as hv

pd.options.mode.chained_assignment = None  # default='warn'

# Daily Analysis of COVID-19 Data from John Hopkins University

In [3]:
print('\t\tLast updated:',dt.datetime.now().strftime('%Y-%m-%d %H:%M UTC'))

Last updated: 2020-03-19 16:30 UTC


Josef Kellndorfer, Ph.D., Earth Big Data, LLC, Richard Signell, Ph.D., USGS

These plots show the daily status of COVID-19 cases as reported by John Hopkins University. Please use freely to look at daily changes and trends. Keep in mind that data are changing frequently as more Covid-19 tests become available globally. We chose to plot **totals and** numbers **normalized by population**. Also, it is advantageous to plot case totals (confirmed infections, deaths, and recovered) in **logarithmic scale** where trends and parallels between countries become more obvious. Taking a close look at the plots, one will discern differences and similarities and that for the most part initial stages are similar in all countries with a time lag. What to look out for is whether the measures taken by counties, foremost social distancing show the desired effects of slowing and eventually reversing the exponetial upwards trends. The first set of plots looks at confirmed infections, the bottom set of plots looks at confirmed deaths which may be somewhat more reliable with respect to an impact for a country while tests are rolled out in larger numbers.

This is a work in progress, stay tuned. 

You can get the notebook underlying this work at:
[https://github.com/EarthBigData/covid19](https://github.com/EarthBigData/covid19)

## Interating with the Plots

You can use the control buttons to interact with the plots, e.g. zoom in/out or also hover over the data points to get a detailed number.


## John Hopkins University daily updated COVID-19 data
COVID-19 confirmed cases, deaths and recovered cases data are streamed from the [The Center for Systems Science and Engineering (CSSE)](https://systems.jhu.edu) at Johns Hopkins University. The CCSE COVID-19 [GitHub Repo](https://github.com/CSSEGISandData/COVID-19) has more information about these data and their sources.

## UN Population Data
We obtain the Population data from UN statistics.  [UN Population Data Sets](https://population.un.org/wpp/Download/Standard/Population/) have more information about these data and their sources.

## Load US Population Data
US population data ar obtained from US Census statistics.  [US Population Data Sets](http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv?#) have more information about these data and their sources.

## Notebook Resources: 
- [Jupyter](https://jupyter.org/) 
- [Hvplot](https://hvplot.holoviz.org/).
- Friedrich Knuth [https://hub-binder.mybinder.ovh/user/friedrichknuth-covid_dashboard-n9gxg9d1/tree/covid_dashboard](https://hub-binder.mybinder.ovh/user/friedrichknuth-covid_dashboard-n9gxg9d1/tree/covid_dashboard). 

In [4]:
# World population
# pop='https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv'
# We use a locally downloaded dataset
pop= '/s/data//WPP2019_TotalPopulationBySex.csv'

popdf = pd.read_csv(pop)
popdf.rename(columns={'Location': 'country','PopTotal':'population'}, inplace = True)

#popdf[(popdf.Time==2019) & (popdf.country=='China')].head(20)

popdf2=popdf[popdf.Time==2019]
#[ x for x in popdf2.country.values if x.find('Korea') > -1]

# Necessary adjustmens of names so we can link the population and jhu data sets
popdf2.loc[popdf2.country=='United States of America','country']='US'
popdf2.loc[popdf2.country=='Iran (Islamic Republic of)','country']='Iran'
popdf2.loc[popdf2.country=='Republic of Korea','country']='Korea, South'

In [5]:
# US Population
#popus='http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv?#'
popus='/s/data/nst-est2019-alldata.csv'
popusdf = pd.read_csv(popus)
popusdf['POPESTIMATE2019']
popusdf.rename(columns={'NAME': 'state','POPESTIMATE2019':'population'}, inplace = True)

In [6]:
# Sources for COVID-19 
# c = confirmed cases
# d = deaths
# r = recovered

cv19_c = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
cv19_d ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
cv19_r ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'

## Confirmed Cases: Status for Germany

In [7]:
df = pd.read_csv(cv19_c)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.state='Germany'
df=df[(df.country=='Germany')].reset_index(drop=True)
state = df.state.str.split(',').apply(lambda x: x[-1].strip())
county = df.state.str.split(',').apply(lambda x: x[0].strip())
county[~df.state.str.contains(',')] = None
df.state = state
df.insert(0, 'county', county)
# df.head()

logscale=False
ylim=(1,1.5e4)
title='Confirmed cases of COVID-19 in Germany, Status: {}'.format(df.columns[-1].date())

opts = {'legend':True, 'logy': logscale, 'grid': True, 'width': 700, 'height': 400,
        'title': title, 'padding':0.1,'ylim':ylim,'ylabel':'Number of Confirmed Cases','xlabel':'Date'}
s = df.select_dtypes(np.int).sum()
s.name = 'Germany'
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)
layout = lines * dots
layout

## Confirmed Cases: Status for United States

In [8]:
df = pd.read_csv(cv19_c)
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df = df[(df.country=='US') & (df.state!='Diamond Princess') & 
        (df.state!='Grand Princess')].reset_index(drop=True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))
state = df.state.str.split(',').apply(lambda x: x[-1].strip())
county = df.state.str.split(',').apply(lambda x: x[0].strip())
county[~df.state.str.contains(',')] = None
df.state = state
df.insert(0, 'county', county)
#df.head()

logscale=False
ylim=(1,1.5e4)
title='Confirmed cases of COVID-19 in the USA. Status: {}'.format(df.columns[-1].date())

opts = {'legend':True, 'logy': logscale, 'grid': True, 'width': 700, 'height': 400,
        'title': title, 'padding':0.1,'ylim':ylim,'ylabel':'Number of Confirmed Cases','xlabel':'Date'}
s = df.select_dtypes(np.int).sum()
s.name = 'USA'
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)

# Hvplot creates holoviews objects, and the `*` symbol means [overlay](http://holoviews.org/reference/containers/bokeh/Overlay.html).  See [holoviz plot customization](http://holoviews.org/user_guide/Customizing_Plots.html) for available options.  
usa = lines * dots
usa

##  Confirmed Cases: Country Comparison

In [14]:
countries=('China','Korea, South','Italy','Iran','Spain','Germany','Austria','France','US','Mexico','Canada')

df = pd.read_csv(cv19_c)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/(popdf2[(popdf2.country==name) & (popdf2.Time==2019)].population.values[0]*10)
        ylim=(0.00001,0.015)
    else:
        ylim=((1.0,1.0e4))
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
        'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
        'ylim':ylim}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.000001,0.07)
            ylabel='Number of cases normalized by population [%]'
            title_format='COVID-19 Confirmed Cases: Normalized by Population, Status: {}'.format(df.columns[-1].date())
            xloc,yloc=pd.to_datetime(df.columns[4]).date(), -0.006
        else:
            ylim=((1,100000))
            ylabel='Total Number of Cases [Count]'
            title_format='COVID-19 Confirmed Cases: Total Count, Status: {}'.format(df.columns[-1].date())

        if logscale:
            title_format+=', Logarithmic Scale'

        ol=[]
        for i in countries:
            ol.append(country(i,normalize_by_population,logscale))

        #text=hv.Text(xloc,yloc, 'Source: John Hopkins University (COVID-19), United Nations (Population, 2019)')

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=800,width=1000,title_format=title_format, ylim=ylim,ylabel=ylabel,xlabel='Date',xrotation=45))
layout=hv.Layout(lo).opts(shared_axes=False)
layout.cols(1)

## Confirmed Cases: U.S. States Comparison

In [15]:
states=(('Massachusetts','MA'),('Conneticut','CT'),('Washington','WA'),('Texas','TX'),('California','CA'),('New York','NY'),('Georgia','GA'),('Illinois','IL'),('Michigan','MI'))


df = pd.read_csv(cv19_c)
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df = df[(df.country=='US') & (df.state!='Diamond Princess') & 
        (df.state!='Grand Princess')].reset_index(drop=True)
# df.state='Germany'
# df=df[(df.country=='Germany')].reset_index(drop=True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))
state = df.state.str.split(',').apply(lambda x: x[-1].strip())
county = df.state.str.split(',').apply(lambda x: x[0].strip())
county[~df.state.str.contains(',')] = None
df.state = state
df.insert(0, 'county', county)
#df.head()

def state(name='Massachusetts',code='MA',normalize_by_population=False,logy=False):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/popusdf[popusdf.state=='Massachusetts'].population.values[0]
        ylim=(0.00001,0.01)
    else:
        ylim=((1.0,1.0e4))
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
        'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
        'ylim':ylim}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

sd='2020-02-15'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])


lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.000001,0.0004)
            ylabel='Number of cases normalized by population [%]'
            title_format='U.S. COVID-19 Confirmed Cases: Normalized by Population, Status: {}'.format(df.columns[-1].date())
        else:
            ylim=((1,3000))
            ylabel='U.S. Total Number of Cases [Count]'
            title_format='U.S. COVID-19 Confirmed Cases: Total Count, Status: {}'.format(df.columns[-1].date())
        ol=[]
        for i in states:
            ol.append(state(*i,normalize_by_population,logscale))

        if logscale:
            title_format+=', Logarithmic Scale'

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=800,width=1000,title_format=title_format, ylim=ylim,xlim=xlim,ylabel=ylabel,xlabel='Date',xrotation=45))
layout=hv.Layout(lo).opts(shared_axes=False)
layout.cols(1)

## COVID-19 Country Comparison Deaths

In [16]:
countries=('China','Korea, South','Italy','Iran','Spain','Germany','Austria','France','US','Mexico','Canada')

df = pd.read_csv(cv19_d)
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/(popdf2[(popdf2.country==name) & (popdf2.Time==2019)].population.values[0]*10)
        ylim=(0.00001,0.005)
    else:
        ylim=((1.0,1.0e4))
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
        'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
        'ylim':ylim}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate


lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.000001,0.006)
            ylabel='Number of cases normalized by population [%]'
            title_format='COVID-19 Deaths: Normalized by Population'
            xloc,yloc=pd.to_datetime(df.columns[4]).date(), -0.006
        else:
            ylim=((1,4000))
            ylabel='Total Number of Deaths [Count]'
            title_format='COVID-19 Deaths: Total Count'

        if logscale:
            title_format+=', Logarithmic Scale'

        ol=[]
        for i in countries:
            ol.append(country(i,normalize_by_population,logscale))

        #text=hv.Text(xloc,yloc, 'Source: John Hopkins University (COVID-19), United Nations (Population, 2019)')

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=800,width=1000,title_format=title_format, ylim=ylim,ylabel=ylabel,xlabel='Date',xrotation=45))
layout=hv.Layout(lo).opts(shared_axes=False)
layout.cols(1)

## U.S.Deaths

In [17]:
states=(('Massachusetts','MA'),('Conneticut','CT'),('Washington','WA'),('Texas','TX'),('California','CA'),('New York','NY'),('Georgia','GA'),('Illinois','IL'),('Michigan','MI'))

df = pd.read_csv(cv19_d)
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df = df[(df.country=='US') & (df.state!='Diamond Princess') & 
        (df.state!='Grand Princess')].reset_index(drop=True)
# df.state='Germany'
# df=df[(df.country=='Germany')].reset_index(drop=True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))
state = df.state.str.split(',').apply(lambda x: x[-1].strip())
county = df.state.str.split(',').apply(lambda x: x[0].strip())
county[~df.state.str.contains(',')] = None
df.state = state
df.insert(0, 'county', county)

def state(name='Massachusetts',code='MA',normalize_by_population=False,logy=False):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/popusdf[popusdf.state=='Massachusetts'].population.values[0]
        ylim=(0.00001,0.01)
    else:
        ylim=((1.0,1.0e4))
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
        'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
        'ylim':ylim}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

sd='2020-02-15'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])


lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.000001,0.00001)
            ylabel='Number of cases normalized by population [%]'
            title_format='COVID-19 Deaths: Normalized by Population'
        else:
            ylim=((1,75))
            ylabel='Total Number of Deaths [Count]'
            title_format='COVID-19 Deaths: Total Count'

        if logscale:
            title_format+=', Logarithmic Scale'

        ol=[]
        for i in states:
            ol.append(state(*i,normalize_by_population,logscale))

        #text=hv.Text(xloc,yloc, 'Source: John Hopkins University (COVID-19), United Nations (Population, 2019)')

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=800,width=1000,title_format=title_format, ylim=ylim,xlim=xlim,ylabel=ylabel,xlabel='Date',xrotation=45))
layout=hv.Layout(lo).opts(shared_axes=False)
layout.cols(1)

# We hope these data are informative and convey how serious we have to take the COVID-19 pandemic. Stay safe.