In [63]:
import numpy as np
import pandas as pd
import datetime as dt
pd.set_option('display.max_rows', 1000)
import hvplot.pandas
import holoviews as hv
from math import log,ceil
from bokeh.models import HoverTool
import warnings


from math import pi
from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum

warnings.simplefilter('ignore')
pd.options.mode.chained_assignment = None  # default='warn'

In [64]:
# World population
# pop='https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv'
# We use a locally downloaded dataset from the link above
pop= '/s/data//WPP2019_TotalPopulationBySex.csv'

popdf = pd.read_csv(pop)
popdf.rename(columns={'Location': 'country','PopTotal':'population'}, inplace = True)

#popdf[(popdf.Time==2019) & (popdf.country=='China')].head(20)

popdf2=popdf[popdf.Time==2019]
#[ x for x in popdf2.country.values if x.find('Korea') > -1]

# Necessary adjustmens of names so we can link the population and jhu data sets
popdf2.loc[popdf2.country=='United States of America','country']='US'
popdf2.loc[popdf2.country=='Iran (Islamic Republic of)','country']='Iran'
popdf2.loc[popdf2.country=='Republic of Korea','country']='Korea, South'

In [65]:
# US Population
#popus='http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv?#'
popus='/s/data/nst-est2019-alldata.csv'
popusdf = pd.read_csv(popus)
popusdf['POPESTIMATE2019']
popusdf.rename(columns={'NAME': 'state','POPESTIMATE2019':'population'}, inplace = True)

In [66]:
# Sources for COVID-19 
# c = confirmed cases
# d = deaths
# r = recovered

cv19_c ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
cv19_d ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
cv19_r ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
cv19_cu ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
cv19_du ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
# cv19_cu ='/s/data/covid19/ebd_us_Confirmed.csv'
# cv19_du ='/s/data/covid19/ebd_us_Deaths.csv'
#old
#cv19_co ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
# cv19_do ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
# Switching for now to data for US provided in old format from soothsawyer
# cv19_co ='https://www.soothsawyer.com/john-hopkins-time-series-data-confirmed-case-csv-after-march-22-2020/?github=1'
# cv19_do ='https://www.soothsawyer.com/wp-content/uploads/2020/03/time_series_19-covid-Deaths.csv'
# cv19_co='/s/data/covid19/time_series_19-covid-Confirmed.csv'
# cv19_do='/s/data/covid19/time_series_19-covid-Deaths.csv'

# cv19_r ='https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'

In [67]:
# Pre-Selected Countries
countries=('China','Korea, South','Japan','Italy','Iran','Spain','Germany',
           'Austria','France','Sweden','United Kingdom','US','Mexico','Canada',
           'Brazil','India','Australia','South Africa','Egypt'
          )
high_lighted_countries=('US','Germany','Korea, South')

In [68]:
# Pre-Selected States
states=(('Massachusetts','MA'),('Connecticut','CT'),('Washington','WA'),
        ('Texas','TX'),('California','CA'),('New York','NY'),
        ('Georgia','GA'),('Illinois','IL'),('Michigan','MI'),
        ('Louisiana','LA'),('Florida','FL'),('New Jersey','NJ'),
        ('Colorado','CO'),('Indiana','IN'),('Pennsylvania','PA')
       )
states=list(states)
states.sort()

In [69]:
# Selected counties
selected_counties={
    'California':['San Francisco','Los Angeles','San Diego'],
    'Connecticut':['Fairfield','New Haven'],
    'Massachusetts':['Hampshire','Dukes and Nantucket','Barnstable','Norfolk','Middlesex'],
    'Michigan':['Washtenaw'],
    'Pennsylvania':['Union'],
    'New York':['New York']
}

In [70]:
#Deaths
df_d = pd.read_csv(cv19_d)
df_d.columns = df_d.columns[0:4].append(pd.to_datetime(df_d.columns[4:]))
df_d.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df_d = df_d[[x in countries for x in df_d.country.values]]
# Infections confirmed cases
df_c = pd.read_csv(cv19_c)
df_c.columns = df_c.columns[0:4].append(pd.to_datetime(df_c.columns[4:]))
df_c.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df_c = df_c[[x in countries for x in df_c.country.values]]

In [71]:
ger_c=df_c.loc[df_c.country=='Germany'][df_c.columns[-1]].sum()
ger_d=df_d.loc[df_c.country=='Germany'][df_c.columns[-1]].sum()
usa_c=df_c.loc[df_c.country=='US'][df_c.columns[-1]].sum()
usa_d=df_d.loc[df_c.country=='US'][df_c.columns[-1]].sum()
all_c=df_c[df_c.columns[-1]].sum()
all_d=df_d[df_c.columns[-1]].sum()

In [72]:
# Get the data sets
df_du = pd.read_csv(cv19_du)
# Add population for Dukes and Nantucket Counties (11399,17332)
df_du['Population'][df_du[(df_du.Admin2=='Dukes and Nantucket')].index.values[0]]=17332+11399
df_cu = pd.read_csv(cv19_cu)
# JPU US Confirmed does not have the Population column!
df_cu.insert(list(df_du.columns).index('Population'),'Population',df_du.Population.values)


def pred_df(df):
    df.drop(['UID','iso2','iso3','code3'],1,inplace=True)
    df.rename(columns={'Country_Region': 'country', 'Province_State': 'state',
                    'Lat': 'lat', 'Long_': 'lon','Admin2':'county'}, inplace = True)
    df = df[(df.country=='US') & (df.state!='Diamond Princess') & 
                (df.state!='Grand Princess') & (df.county != 'NaN')].reset_index(drop=True)
    df['county'][df.county.isna()]='Territory'
    idx=list(df.columns).index('Population')+1
    df.iloc[:,idx-1] = df.iloc[:,idx-1].astype(float)
    df.columns = df.columns[:idx].append(pd.to_datetime(pd.to_datetime(df.columns[idx:]).date))
    return df

df_cu=pred_df(df_cu)
df_du=pred_df(df_du)

# snames=[x[0] for x in states]
# scodes=[x[1] for x in states]
# df_cu = df_cu[[x in snames or x in scodes for x in df_cu.state.values]]
# df_du = df_du[[x in snames or x in scodes for x in df_du.state.values]]

<a id='top'></a>
# Daily Analysis of COVID-19 Data from Johns Hopkins University

In [73]:
print('           Updated every 6 hours. Last update:',dt.datetime.now().strftime('%Y-%m-%d %H:%M UTC'))

           Updated every 6 hours. Last update: 2020-04-06 02:17 UTC


[Josef Kellndorfer, Ph.D.](mailto:info@earthbigdata.com), [Earth Big Data, LLC](http://earthbigdata.com); Richard Signell, Ph.D.

<a href="mailto:?bcc=info@earthbigdata.com&subject=Informative COVID-19 Website&body=Hi, Josef at Earth Big Data shared this website with another look at COVID-19 data. I thought you might like it: http://earthbigdata.com/covid-19/">Click here</a> to share this website.

NOTE: You can **hover over plots** to see actual numbers.



### News 2020-04-06
- Added a [section of selected counties compared to state and country](#SelectedCountiesCases)
- Added [**Cape Cod and Islands**](#CapeAndIslandsCases)  plots
- Added charts for [**top 12 countries**](#Top12CasesDeaths) confirmed cases and deaths 

### Content
[Introduction: Population Normalization and Logarithmic Scaling](#Introduction)  
[Interacting with the plots](#Interaction)  

**Examples Linear and Logarithmic Scale**  
[Confirmed Cases: USA](#Confirmed_Cases_USA)  
[Confirmed Cases: Germany](#Confirmed_Cases_Germany)  
**Latest Data: Confirmed Cases and Deaths by Country**  
[Latest Top 12 Countries Total Cases and Deaths](#Top12CasesDeaths)  
[Latest per Capita Confirmed Cases and Deaths](#PerCapitaCasesDeathCountries)  
**Timelines: Countries**  
[Confirmed Cases: Country Comparison](#Confirmed_Cases_Country_Comparison)  
[Deaths: Country Comparison](#Deaths_Country_Comparison)  
**Timelines: U.S. States**  
[Confirmed Cases: U.S. States Comparison](#Confirmed_Cases_US_Comparison)  
[Deaths: U.S States Comparison](#Deaths_US_Comparison)  
**Timelines: Cape Cod and Islands**  
[Cape Cod and Islands Confirmed Cases](#CapeAndIslandsCases)  
[Cape Cod and Islands Deaths](#CapeAndIslandsDeaths)  
**Mortality**  
[Latest Mortality Rate](#MortalityRate)  
[Mortality Timeline](#MortalityTimeline)  
**3-day Change by Country**  
[3-day Change in Confirmed Cases](#ChangeConfirmedCases)  
[3-day Change in Deaths](#ChangeDeaths)  
**Doubling Rates Countries**  
[Doubling Rate in Days: Confirmed Cases](#DoublingDaysConfirmedCases)  
[Doubling Rate in Days: Deaths](#DoublingDaysDeaths)  
**Doubling Rates U.S.**  
[Doubling Rate in Days: Confirmed Cases (U.S.)](#DoublingDaysConfirmedCasesUS)  
[Doubling Rate in Days: Deaths (U.S)](#DoublingDaysDeathsUS)  
**Selected U.S. Counties**  
[Selected Counties: Confirmed Cases](#SelectedCountiesCases)   
[Selected Counties: Deaths](#SelectedCountiesDeaths)   

[Suggested Reading](#Reading)  
[Data Sources](#DataSources)  


<a id='Introduction'></a>
## Introduction: Population Normalization and Logarithmic Scaling
[back to top](#top)  
These plots show the daily status of COVID-19 cases as reported by Johns Hopkins University. We want to caveat the data analysis and interpretation:

- Keep in mind that data are changing frequently as more Covid-19 testing (for cases and deaths) becomes more readily available globally.
- Overall: Use your own judgement on the trustworthiness on reported data, i.e. where the data come from. (see section on [Data Sources](#DataSources))

We chose to plot **totals and** numbers **normalized by population** (expressed as per 100,000). Also, it is advantageous to plot case totals (confirmed infections, deaths) in **logarithmic scale** where trends and parallels between countries become more obvious. Note, that a straight line trending upwards in logarithmic scale indicates exponential increase! Taking a close look at the plots, one will discern differences and similarities, and that for the most part initial stages are **similar in all countries with a time lag**. What to look out for is whether the measures taken by countries, foremost social distancing show the desired effects of slowing and eventually levelling out the exponential upwards trends. We produce plots for confirmed cases and deaths, which may be somewhat more reliable with respect to an impact for a country while tests are rolled out in larger numbers. We also plot mortality rates, 3-day change curves and doubling rates of confirmend cases and deaths.

This is work in progress, stay tuned. 

You can get the notebook underlying this work at:
[https://github.com/EarthBigData/covid19](https://github.com/EarthBigData/covid19)

<a id='Interaction'></a>
## Interacting with the plots
[back to top](#top)  

**Hover:** See actual numbers when hovering over a plot.

**Control buttons:** Interact with the plots: **Pan**, **Zoom in/out**, **Reset**, **Save**.

**Labels:** In the legend, click on label **to dim/highlight a specific country or state**.

<a id='Confirmed_Cases_USA'></a>
## Confirmed Cases: Status for United States
[back to top](#top)

In [12]:
df = pd.read_csv(cv19_c,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[(df.country=='US') & (df.state!='Diamond Princess') & 
        (df.state!='Grand Princess')].reset_index(drop=True)

logscale=False
title='Confirmed cases of COVID-19 in the USA. Status: {}'.format(df.columns[-1].date())

s = df.select_dtypes(np.int).sum()
upperlim=s[-1]*1.2
ylim=(1,upperlim)
s.name = 'USA'

logscale=False
title='Confirmed cases of COVID-19 in {}, Status: {}'.format(s.name,df.columns[-1].date())
opts = {'legend':True, 'logy': logscale, 'grid': True,'title': title, 'padding':0.1,'ylim':ylim,
        'ylabel':'Number of Confirmed Cases','xlabel':'Date','width':700}
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)
layoutlin = (lines * dots).opts(active_tools=['box_zoom'])

logscale=True
title='Confirmed cases of COVID-19 in {}, Status: {}, Logarithmic Scale'.format(s.name,df.columns[-1].date())
opts = {'legend':True, 'logy': logscale, 'grid': True,'title': title, 'padding':0.1,'ylim':ylim,
        'ylabel':'Number of Confirmed Cases','xlabel':'Date','width':700}
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)
layoutlog = (lines * dots).opts(active_tools=['box_zoom'])

layout=hv.Layout([layoutlin,layoutlog]).opts(shared_axes=False,merge_tools=False)
#layout.toolbar.active_drag = None
layout.cols(1)

<a id='Confirmed_Cases_Germany'></a>
## Confirmed Cases: Status for Germany
[back to top](#top)

In [13]:
df = pd.read_csv(cv19_c,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df.state='Germany'
df=df[(df.country=='Germany')].reset_index(drop=True)

s = df.select_dtypes(np.int).sum()
upperlim=s[-1]*1.2
ylim=(1,upperlim)

s.name = 'Germany'

logscale=False
title='Confirmed cases of COVID-19 in Germany, Status: {}'.format(df.columns[-1].date())
# opts = {'legend':True, 'logy': logscale, 'grid': True, 'width': 350, 'height': 400,
#         'title': title, 'padding':0.1,'ylim':ylim,'ylabel':'Number of Confirmed Cases','xlabel':'Date'}
opts = {'legend':True, 'logy': logscale, 'grid': True,'title': title, 'padding':0.1,'ylim':ylim,
        'ylabel':'Number of Confirmed Cases','xlabel':'Date','width':700}
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)
layoutlin = (lines * dots).opts(active_tools=['box_zoom'])

logscale=True
title='Confirmed cases of COVID-19 in Germany, Status: {}, Logarithmic Scale'.format(df.columns[-1].date())
# opts = {'legend':True, 'logy': logscale, 'grid': True, 'width': 350, 'height': 400,
#         'title': title, 'padding':0.1,'ylim':ylim,'ylabel':'Number of Confirmed Cases','xlabel':'Date'}
opts = {'legend':True, 'logy': logscale, 'grid': True,'title': title, 'padding':0.1,'ylim':ylim,
        'ylabel':'Number of Confirmed Cases','xlabel':'Date','width':700}
lines = s.hvplot(**opts) 
dots  = s.hvplot.scatter(**opts)
layoutlog = (lines * dots).opts(active_tools=['box_zoom'])

layout=hv.Layout([layoutlin,layoutlog]).opts(shared_axes=False,merge_tools=False)
layout.cols(1)

<a id='Top12CasesDeaths'></a>
## Latest Top 12 Countries Total Confirmed Cases and Deaths
[back to top](#top)

In [14]:
# Pie chart
#Deaths
df_d = pd.read_csv(cv19_d)
df_d.columns = df_d.columns[0:4].append(pd.to_datetime(df_d.columns[4:]))
df_d.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
#df_d = df_d[[x in countries for x in df_d.country.values]]
# Infections confirmed cases
df_c = pd.read_csv(cv19_c)
df_c.columns = df_c.columns[0:4].append(pd.to_datetime(df_c.columns[4:]))
df_c.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
# df_c = df_c[[x in countries for x in df_c.country.values]]

current=df_c.groupby('country').sum()
data=current[current.columns[-1]]
data=pd.DataFrame(data)
data.columns=['value']

top=12
data.sort_values('value',ascending=False,inplace=True)
total=data.sum().value
other=data[top:].sum()
other.name='Other'
data=data[:top].append(other)

data['angle'] = data['value']/data['value'].sum() * 2*pi
colors=Category20c[len(data.index)]
data['color'] = colors

title='Top 12 countries of confirmed {:,} COVID-19 CASES ({})'.format(total,current.columns[-1].date())
p = figure(plot_height=350, title=title, toolbar_location=None,
        tools="hover", tooltips="@country: @value{0,}")

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='country', source=data)
p.xaxis.visible=False
p.yaxis.visible=False
p.grid.visible=False
p.outline_line_color='white'

show(p)

In [15]:
# Pie chart
current=df_d.groupby('country').sum()
data=current[current.columns[-1]]
data=pd.DataFrame(data)
data.columns=['value']

top=12
data.sort_values('value',ascending=False,inplace=True)
total=data.sum().value
other=data[top:].sum()
other.name='Other'
data=data[:top].append(other)

data['angle'] = data['value']/data['value'].sum() * 2*pi
#data['color'] = Category20c[len(data.index)]
data['color'] = colors

title='Top 12 countries of confirmed {:,} COVID-19 DEATHS ({})'.format(total,current.columns[-1].date())
p = figure(plot_height=350, title=title, toolbar_location=None,
        tools="hover", tooltips="@country: @value{0,}")

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='country', source=data)
p.xaxis.visible=False
p.yaxis.visible=False
p.grid.visible=False
p.outline_line_color='white'

show(p)

In [16]:
# find coutries in top 12 not in preselected list and add to pre-selected countries
top12=set(data.index)
missing_countries = (top12 - set(countries))
if 'Other' in missing_countries: missing_countries.remove('Other')
countries = list(tuple(countries) + tuple(missing_countries))
countries.sort(reverse=True)

<a id='PerCapitaCasesDeathCountries'></a>
##  Latest Confirmed Cases and Deaths Per Capita: Country Comparison
[back to top](#top)

In [17]:
pdf=popdf2
# Deaths
df_d = pd.read_csv(cv19_d)
df_d.columns = df_d.columns[0:4].append(pd.to_datetime(df_d.columns[4:]))
df_d.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
# Confirmed cases
df_c = pd.read_csv(cv19_c)
df_c.columns = df_c.columns[0:4].append(pd.to_datetime(df_c.columns[4:]))
df_c.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
# Population
pdf=pdf[pdf.Time==2019]
pdf.set_index(pdf.country,inplace=True)
pdf=pdf[pdf.Time==2019].loc[list(countries)].sort_index().population

# Get the latest numbers
latest_c = [df_c.columns[1],df_c.columns[-1]]
df_c = df_c[latest_c].groupby('country').sum()
latest_d = [df_d.columns[1],df_d.columns[-1]]
df_d = df_d[latest_d].groupby('country').sum()

# Compute per capita numbers
c = df_c.select_dtypes(np.int).loc[list(countries)].sort_index().iloc[:,0]
d = df_d.select_dtypes(np.int).loc[list(countries)].sort_index().iloc[:,0]

# Cases
m=c/pdf*100
m.dropna(inplace=True)
m.sort_values(ascending=True,inplace=True)
title='COVID-19 Confirmed cases per 100,000 by Country, Status {}'.format(latest_c[-1].date())
cases = m.hvplot.barh(padding=0.1,grid=True,ylabel='Confirmed Cases per Capita [per 100,000]',xlabel='',title=title,height=400)
# Deaths
m=d/pdf*100
m.dropna(inplace=True)
m.sort_values(ascending=True,inplace=True)
title='COVID-19 Deaths per 100,000 by Country, Status {}'.format(latest_c[-1].date())
deaths = m.hvplot.barh(padding=0.1,grid=True,ylabel='Deaths per Capita [per 100,000]',xlabel='',title=title,height=400)

layout=hv.Layout([cases,deaths]).opts(shared_axes=False)
layout.cols(1)

<a id='Confirmed_Cases_Country_Comparison'></a>
##  Timeline Confirmed Cases: Country Comparison
[back to top](#top)

**CLICK on a label to highlight a line**

In [62]:
#countries=('China','Korea, South','Japan','Italy','Iran','Spain','Germany','Austria','France','Sweden','United Kingdom','US','Mexico','Canada','Brazil','India','Australia')
df = pd.read_csv(cv19_c,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)
df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def uplim(df,pdf,normalize=False,extend=1.2):  
    smax=0
    for i in df.country:
        s = df[df.country==i].select_dtypes(np.int).sum()[-1]
        if normalize:
            s = s/(pdf[(pdf.country==i) & (pdf.Time==2019)].population.values[0]*1000)*100000
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    if normalize_by_population:
        ulim,ulimlog=uplim(country,popdf2)
        s = s/(popdf2[(popdf2.country==name) & (popdf2.Time==2019)].population.values[0]*1000)*100000
#         upperlim=s[-1]*1.2
#         ylim=(0.00001,upperlim)
#         print(ylim)
#     else:
#         upperlim=s[-1]*1.2
#         ylim=(1,upperlim)
#     opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
#         'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
#         'ylim':ylim}
    if name in high_lighted_countries:
        legend_muted=False
    else:
        legend_muted=True
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 450,'padding':0.05,'size':10, 'alpha':1, 'muted_alpha':0.3}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = (lines * dots).opts(legend_muted=legend_muted)
    return hstate

ulim,ulimlog=uplim(df,popdf2)
nulim,nulimlog=uplim(df,popdf2,True)

lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylabel='Number of cases normalized by population [per 100,000]'
            title_format='COVID-19 Confirmed Cases: Normalized by Population, Status: {}'.format(df.columns[-1].date())
            ylim=(0.00001,nulim)
        else:
            ylim=(1,ulim)
            ylabel='Total Number of Cases [Count]'
            title_format='COVID-19 Confirmed Cases: Total Count, Status: {}'.format(df.columns[-1].date())

        if logscale:
            title_format+=', Log. Scale'

        ol=[]
        for i in countries:
            ol.append(country(i,normalize_by_population,logscale))

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=650,width=780,title_format=title_format,ylabel=ylabel,
                               xlabel='Date',xrotation=45,ylim=ylim,logy=logscale,active_tools=['box_zoom'],toolbar='right',legend_muted=True))
#         lo.append(overlay.opts(height=650,width=780,title_format=title_format,ylabel=ylabel,
#                                xlabel='Date',xrotation=45,ylim=ylim,logy=logscale,active_tools=['box_zoom'],toolbar='right'))
layout=hv.Layout(lo).opts(shared_axes=False,merge_tools=False)
layout.cols(1)

<a id='Deaths_Country_Comparison'></a>
## Timeline Deaths: Country Comparison
[back to top](#top)

**CLICK on a label to highlight a line**

In [61]:
#countries=('China','Korea, South','Japan','Italy','Iran','Spain','Germany','Austria','France','US','Mexico','Canada','Brazil','India','United Kingdom')

df = pd.read_csv(cv19_d,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def uplim(df,pdf,normalize=False,extend=1.2):  
    smax=0
    for i in df.country:
        s = df[df.country==i].select_dtypes(np.int).sum()[-1]
        if normalize:
            s = s/(pdf[(pdf.country==i) & (pdf.Time==2019)].population.values[0]*1000)*100000
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    if normalize_by_population:
        ulim,ulimlog=uplim(country,popdf2)
        s = s/(popdf2[(popdf2.country==name) & (popdf2.Time==2019)].population.values[0]*1000)*100000
#         upperlim=s[-1]*1.2
#         ylim=(0.00001,upperlim)
#         print(ylim)
#     else:
#         upperlim=s[-1]*1.2
#         ylim=(1,upperlim)
#     opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,
#         'title': f'Confirmed cases of COVID-19 in {name}', 'padding':0.1,
#         'ylim':ylim}
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.05,'size':10}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

ulim,ulimlog=uplim(df,popdf2)
nulim,nulimlog=uplim(df,popdf2,True)

lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylabel='Number of Deaths normalized by population [per 100,000]'
            title_format='COVID-19 Deaths: Normalized by Population, Status: {}'.format(df.columns[-1].date())
            ylim=(0.00001,nulim)
        else:
            ylim=(1,ulim)
            ylabel='Total Number of Deaths [Count]'
            title_format='COVID-19 Deaths: Total Count, Status: {}'.format(df.columns[-1].date())

        if logscale:
            title_format+=', Log. Scale'

        ol=[]
        for i in countries:
            ol.append(country(i,normalize_by_population,logscale))

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=650,width=780,title_format=title_format,ylabel=ylabel,
                               xlabel='Date',xrotation=45,ylim=ylim,logy=logscale,active_tools=['box_zoom'],toolbar='right',legend_muted=True))
layout=hv.Layout(lo).opts(shared_axes=False,merge_tools=False)
layout.cols(1)

<a id='Confirmed_Cases_US_Comparison'></a>
## Timeline Confirmed Cases: U.S. States
[back to top](#top)

**CLICK on a label to highlight a line**

In [74]:
# Set dataframe to confirmed cases US (df_cu)
df=df_cu

snames=[x[0] for x in states]
scodes=[x[1] for x in states]
df = df[[x in snames or x in scodes for x in df.state.values]]

def uplim(states,df,pdf,normalize=False,extend=1.2):  
    smax=0
    for i in states:
        name,code=i
        state = df[(df.state==name) | (df.state==code)]
        s = state.select_dtypes(np.int).sum()[-1]
        if normalize:
            s = s/pdf[pdf.state==name].population.values[0]*100000
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def state(name='Massachusetts',code='MA',normalize_by_population=False,logy=False):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/popusdf[popusdf.state==name].population.values[0]*100000
    s.name = name  
#     opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10,'tools':[]}
#     lines = s.hvplot(**opts).opts(tools=[])
#     hover = HoverTool(tooltips=[('Date', '$x{%F}'),(name,'$y{0}')], formatters={'$x': 'datetime'}, mode="mouse")
#     opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10,'tools':[hover]}
#     dots = s.hvplot.scatter(**opts)

    hover = HoverTool(tooltips=[('Date', '$x{%F}'),(name,'$y{0}')], formatters={'$x': 'datetime'}, mode="mouse")
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10,'tools':[hover]}
    lines = s.hvplot(**opts).opts(tools=[])
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

sd='2020-02-15'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

ulim,ulimlog=uplim(states,df,popusdf)
nulim,nulimlog=uplim(states,df,popusdf,True)
# ulim=3000
# nulim=9

lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.001,nulim)
            ylabel='Number of cases normalized by population [per 100,000]'
            title_format='U.S. COVID-19 Confirmed Cases: Normalized by Population, Status: {}'.format(df.columns[-1].date())
            #title_format='U.S. COVID-19 Confirmed Cases: Normalized by Population, Status: 2020-03-22'
        else:
            ylim=(1,ulim)
            ylabel='U.S. Total Number of Cases [Count]'
            title_format='U.S. COVID-19 Confirmed Cases: Total Count, Status: {}'.format(df.columns[-1].date())
            #title_format='U.S. COVID-19 Confirmed Cases: Total Count, Status: 2020-03-22'
        ol=[]
        for i in states:
            ol.append(state(*i,normalize_by_population,logscale))

        if logscale:
            title_format+=', Log. Scale'

        overlay=hv.Overlay(ol)

        

        lo.append(overlay.opts(height=600,width=780,title_format=title_format, 
                               ylim=ylim,xlim=xlim,ylabel=ylabel,xlabel='Date',
                               xrotation=45,active_tools=['box_zoom'],toolbar='right',legend_muted=True))
layout=hv.Layout(lo).opts(shared_axes=False,merge_tools=False)
layout.cols(1)

<a id='Deaths_US_Comparison'></a>
## Timeline Deaths: U.S. States
[back to top](#top)

**CLICK on a label to highlight a line**

In [75]:
# Set dataframe to deaths US (df_du)
df=df_du

snames=[x[0] for x in states]
scodes=[x[1] for x in states]
df = df[[x in snames or x in scodes for x in df.state.values]]

def uplim(states,df,pdf,normalize=False,extend=1.2):  
    smax=0
    for i in states:
        name,code=i
        state = df[(df.state==name) | (df.state==code)]
        s = state.select_dtypes(np.int).sum()[-1]
        if normalize:
            s = s/pdf[pdf.state==name].population.values[0]*100000
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def state(name='Massachusetts',code='MA',normalize_by_population=False,logy=False):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    if normalize_by_population:
        s = s/popusdf[popusdf.state==name].population.values[0]*100000
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

sd='2020-03-01'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
# xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

ulim,ulimlog=uplim(states,df,popusdf)
nulim,nulimlog=uplim(states,df,popusdf,True)

lo=[]
for normalize_by_population in [False,True]:
    for logscale in [False, True]:
    
        if normalize_by_population:
            ylim=(0.001,nulim)
            ylabel='Number of Deaths normalized by population [per 100,000]'
            title_format='U.S. COVID-19 Deaths: Normalized by Population, Status: {}'.format(df.columns[-1].date())
            #title_format='U.S. COVID-19 Deaths: Normalized by Population, Status: 2020-03-22'
        else:
            ylim=(1,ulim)
            ylabel='U.S. Total Number of Deaths [Count]'
            title_format='U.S. COVID-19 Deaths: Total Count, Status: {}'.format(df.columns[-1].date())
            #title_format='U.S. COVID-19 Deaths: Total Count, Status: 2020-03-22'
        ol=[]
        for i in states:
            ol.append(state(*i,normalize_by_population,logscale))

        if logscale:
            title_format+=', Logarithmic Scale'

        overlay=hv.Overlay(ol)
        lo.append(overlay.opts(height=600,width=780,title_format=title_format, ylim=ylim,xlim=xlim,
                               ylabel=ylabel,xlabel='Date',xrotation=45,active_tools=['box_zoom'],toolbar='right',legend_muted=True))
layout=hv.Layout(lo).opts(shared_axes=False,merge_tools=False)
layout.cols(1)

<a id='CapeAndIslandsCases'></a>
## Cape Cod and Islands
[back to top](#top)

The plots below show the confirmed cases and deaths over time on Cape Cod and the Islands compared to the trends in Massachusetts and the U.S.. To compare, the plots show numbers normalized by population (based on U.S. Census population data for 2019). The plots are shown as normalized totals and in logarithmic scale. 

### Cape Cod and Islands Confirmed Cases

In [44]:
# Helper functions
def uplim(states,df,normalize=False,extend=1.2,multiplier=100000):  
    smax=0
    for i in states:
        name,code=i
        state = df[(df.state==name) | (df.state==code)]
        s = state.select_dtypes(np.int).sum()[-1]
        population=state['Population'].sum()
        if normalize:
            s = s/population*multiplier
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def hvcounty(df,county='Barnstable',state='Massachussets',normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.county==county) & (df.state==state)]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(county,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10, 'line_width':3}
    s.name = county
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

def hvstate(df,name='Massachusetts',normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.state==name)]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(name,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10, 'line_width':1,'line_dash':'solid'}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

def hvcountry(df,name='US',normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.country==name) & (df.county != 'Territory')]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(name,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10,'line_width':1,'line_dash':'solid'}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate


In [45]:
ylabel='Number of Confirmed Cases'
title='COVID-19 Confirmed Cases: Cape Cod and Islands'
df = df_cu
counties=['Dukes and Nantucket','Barnstable']
state='Massachusetts'
mask=[x in counties  for x  in df[(df.state==state)].county.values]
df = df[(df.state==state)][mask]
s = df.select_dtypes(np.int).sum()
sd='2020-03-14'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])
pidx=list(df.columns).index('Population')
left=df.iloc[:,:pidx]
sddt=dt.datetime.strptime(sd,'%Y-%m-%d')
idx=list(df.columns).index(sddt)
right=df.iloc[:,idx:]
df =pd.concat([left,right],axis=1)

i=counties[0]
ss=df[(df.county==i)]
s = ss.select_dtypes(np.int).sum()
s.name=i
df1=pd.DataFrame(s,s.index)
for i in counties[1:]:
    ss=df[(df.county==i)]
    s = ss.select_dtypes(np.int).sum()
    s.name=i
    df1.insert(0,s.name,s.values)

xticks=[]
for x in df1.index:
    xticks.append(x.strftime('%b %d'))
    
df1=df1.set_index(pd.Index(xticks))
df1.hvplot.bar(stacked=True).opts(xrotation=45,title=title,ylabel=ylabel)

In [46]:
# Cape and Islands confirmed cases
ylabel='Number of Confirmed Cases normalized by population [per 100,000]'
title='COVID-19 Confirmed Cases: Cape Cod and Islands'
multiplier=100000
df = df_cu
counties=['Dukes and Nantucket','Barnstable']
state='Massachusetts'
logscale=False
# Comfirmed

sd='2020-03-10'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

ulim,ulimlog=uplim(states,df,multiplier=multiplier)
nulim,nulimlog=uplim([(state,'MA')],df,True,multiplier=multiplier)
ol=[]
for logscale in [False,True]: 
    clines=[]
    for i in counties:
        clines.append(hvcounty(df,i,state,True,logscale,multiplier))
    sline=hvstate(df,state,True,logscale,multiplier)
    uline=hvcountry(df,'US',True,logscale,multiplier)

    if logscale:
        ylim=(0.1,nulim)
    else:
        ylim=(0,nulim)
    if logscale:
        title+=', Logarithmic Scale'

    lines=[sline,uline]+clines
    overlay = hv.Overlay(lines).opts(height=600,width=780,title_format=title, xlim=xlim, ylim=ylim,
            ylabel=ylabel,xlabel='Date',xrotation=45,active_tools=['box_zoom'],toolbar='right')

    ol.append(overlay)
    overlay
layout=hv.Layout(ol).opts(shared_axes=False)
layout.cols(1)

<a id='CapeAndIslandsDeaths'></a>
### Cape Cod and Islands Deaths
[back to top](#top)

In [47]:
ylabel='Number of Deaths'
title='COVID-19 Deaths: Cape Cod and Islands'
df = df_du
counties=['Dukes and Nantucket','Barnstable']
state='Massachusetts'
mask=[x in counties  for x  in df[(df.state==state)].county.values]
df = df[(df.state==state)][mask]
s = df.select_dtypes(np.int).sum()
sd='2020-03-14'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])
pidx=list(df.columns).index('Population')
left=df.iloc[:,:pidx]
sddt=dt.datetime.strptime(sd,'%Y-%m-%d')
idx=list(df.columns).index(sddt)
right=df.iloc[:,idx:]
df =pd.concat([left,right],axis=1)

i=counties[0]
ss=df[(df.county==i)]
s = ss.select_dtypes(np.int).sum()
s.name=i
df1=pd.DataFrame(s,s.index)
for i in counties[1:]:
    ss=df[(df.county==i)]
    s = ss.select_dtypes(np.int).sum()
    s.name=i
    df1.insert(0,s.name,s.values)

xticks=[]
for x in df1.index:
    xticks.append(x.strftime('%b %d'))
    
df1=df1.set_index(pd.Index(xticks))
ylim=(0,ceil(df1.iloc[-1,:].values.sum()/10)*10)
df1.hvplot.bar(stacked=True).opts(xrotation=45,title=title,ylabel=ylabel,ylim=ylim)

In [48]:
# Cape and Islands Deaths
ylabel='Number of Deaths normalized by population [per 100,000]'
title='COVID-19 Deaths: Cape Cod and Islands'
multiplier=100000
df = df_du
counties=['Dukes and Nantucket','Barnstable']
state='Massachusetts'
logscale=False
# Comfirmed

sd='2020-03-10'
ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

ulim,ulimlog=uplim(states,df,multiplier=multiplier)
nulim,nulimlog=uplim([(state,'MA')],df,True,multiplier=multiplier)
ol=[]
for logscale in [False,True]: 
    clines=[]
    for i in counties:
        clines.append(hvcounty(df,i,state,True,logscale,multiplier))
    sline=hvstate(df,state,True,logscale,multiplier)
    uline=hvcountry(df,'US',True,logscale,multiplier)

    if logscale:
        ylim=(0.1,nulim)
    else:
        ylim=(0,nulim)
    if logscale:
        title+=', Logarithmic Scale'

    lines=[sline,uline]+clines
    overlay = hv.Overlay(lines).opts(height=600,width=780,title_format=title, xlim=xlim,ylim=ylim,
            ylabel=ylabel,xlabel='Date',xrotation=45,active_tools=['box_zoom'],toolbar='right')

    ol.append(overlay)
layout=hv.Layout(ol).opts(shared_axes=False)
layout.cols(1)


<a id='Mortality'></a>
## Mortality Rates Country Comparison
[back to top](#top)

The plot below shows the *mortality rate* in percent computed as:

$Mortality=\frac{Deaths}{Infected} * 100$

A couple of caveats:
- Because there is a timelag from infection to death, the rates computed on a daily basis might be underestimating the rate.
- Underreporting of COVID-19 deaths is recognized by many countries with inadequate tallying systems.
- The rates might be vastly overestimated because the lag in testing is widespread. South Korea has done the most intensive testing, hence South Korea data seem most reliable for mortality assessment.

<a id='MortalityRate'></a>
### Latest Mortality Rate 
[back to top](#top)

In [28]:
df_d = pd.read_csv(cv19_d)
df_d.columns = df_d.columns[0:4].append(pd.to_datetime(df_d.columns[4:]))
df_d.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
#df_d = df_d[[x in countries for x in df_d.country.values]]
# Infections confirmed cases
df_c = pd.read_csv(cv19_c)
df_c.columns = df_c.columns[0:4].append(pd.to_datetime(df_c.columns[4:]))
df_c.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
#df_c = df_c[[x in countries for x in df_c.country.values]]


latest = [df_d.columns[1],df_d.columns[-1]]
df_d = df_d[latest].groupby('country').sum()
df_c = df_c[latest].groupby('country').sum()

c = df_c.select_dtypes(np.int)
d = df_d.select_dtypes(np.int)
m=d/c*100
m.dropna(inplace=True)
#m.sort_values(m.columns[0],ascending=False,inplace=True)

m_selected = m.loc[countries,:]

m_selected.sort_values(m_selected.columns[0],ascending=True,inplace=True)

title='COVID-19 Mortality by Country, Status {} (See Caveats!)'.format(m.columns[0].date())
m_selected.hvplot.barh(padding=0.1,grid=True,ylabel='Mortality [%]',xlabel='',title=title,height=500)

<a id='MortalityTimeline'></a>
### Mortality Timeline
[back to top](#top)

**CLICK on a label to highlight a line**

In [29]:
countries.sort(reverse=True)

In [30]:
#countries=('China','Korea, South','Japan','Italy','Iran','Spain','Germany','Austria','France','US','Mexico','Canada','Brazil','India','United Kingdom')

#Deaths
df_d = pd.read_csv(cv19_d)
df_d.columns = df_d.columns[0:4].append(pd.to_datetime(df_d.columns[4:]))
df_d.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df_d = df_d[[x in countries for x in df_d.country.values]]
# Infections confirmed cases
df_c = pd.read_csv(cv19_c)
df_c.columns = df_c.columns[0:4].append(pd.to_datetime(df_c.columns[4:]))
df_c.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df_c = df_c[[x in countries for x in df_c.country.values]]


def uplim(df,pdf,normalize=False,extend=1.2):  
    smax=0
    for i in df.country:
        s = df[df.country==i].select_dtypes(np.int).sum()[-1]
        if normalize:
            s = s/(pdf[(pdf.country==i) & (pdf.Time==2019)].population.values[0]*10)
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def mortality(name='Germany',normalize_by_population=False,logy=False):
    country_deaths   = df_d[(df_d.country==name)]
    country_infected = df_c[(df_c.country==name)]
    d = country_deaths.select_dtypes(np.int).sum()
    i = country_infected.select_dtypes(np.int).sum()
    m=d/i*100
    if normalize_by_population:
        ulim,ulimlog=uplim(country,popdf2)
        m = m/(popdf2[(popdf2.country==name) & (popdf2.Time==2019)].population.values[0]*10)
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10}
    m.name = name
    lines = m.hvplot(**opts)
    dots = m.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate

ylim=(-1,15)
ylabel='Mortality [%]'
title_format='COVID-19 Mortality (See Caveats!)'
normalize_by_population=False
logscale=False
ol=[]
for i in countries:
    ol.append(mortality(i,normalize_by_population,logscale))

overlay=hv.Overlay(ol).opts(height=600,width=780,title_format=title_format,ylabel=ylabel,
                            xlabel='Date',xrotation=45,ylim=ylim,logy=logscale,
                            active_tools=['box_zoom'],toolbar='right',legend_muted=True)
overlay

<a id='ChangeConfirmedCases'></a>
## 3-Day Change in Confirmed Cases: Country Comparison
[back to top](#top)

The plots below show the  change of **total number of confirmed cases compared to three days before the plotted date**. A factor **2 means the cases doubled** after three days. A factor **1 means no new confirmed cases are reported** compared to three days before. (Plots also inspired by Jennifer Bardwell, Jim Bardwell).


In [31]:
df = pd.read_csv(cv19_c,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    d=s[2:]
    r= s[2:].values/s[:-2].values
#     new=s[3:].values-s[1:-2].values
#     previous=s[2:].values-s[:-3].values
#     r= new.values/previous.values*100

    d[:]=r    
    opts = {'ylim':(1,4),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline200=hv.HLine(2).opts(color='red', line_width=1)
    hline100=hv.HLine(1).opts(color='black', line_width=1)
    hstate = (lines * dots  *hline200 * hline100)
    return hstate

ylim=(1,4)
ylabel='3-day confirmed cases increase Factor'
title_format='COVID-19 3-Day Confirmed Cases Increase Factor: {}'.format(df.columns[-1].date())
ol=[]
for i in countries:
    ol.append(country(i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='ChangeDeaths'></a>
## 3-Day Change in Deaths: Country Comparison
[back to top](#top)

The plots below show the  change of **total number of deaths compared to three days before the plotted date**. A factor **2 means the cases doubled** after three days. A factor **1 means no new deaths are reported** compared to three days before. (Plots also inspired by Jennifer Bardwell, Jim Bardwell).


In [32]:
df = pd.read_csv(cv19_d,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def country(name='Germany',normalize_by_population=False,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    d=s[2:]
    r= s[2:].values/s[:-2].values
#     new=s[3:].values-s[1:-2].values
#     previous=s[2:].values-s[:-3].values
#     r= new.values/previous.values*100

    d[:]=r    
    opts = {'ylim':(1,4),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline200=hv.HLine(2).opts(color='red', line_width=1)
    hline100=hv.HLine(1).opts(color='black', line_width=1)
    hstate = (lines * dots  *hline200 * hline100)
    return hstate

ylim=(1,4)
ylabel='3-day Death increase Factor'
title_format='COVID-19 3-Day Death Increase Factor: {}'.format(df.columns[-1].date())
ol=[]
for i in countries:
    ol.append(country(i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='DoublingDaysConfirmedCases'></a>
## Number of Days to Double Confirmed Cases: Country Comparison
[back to top](#top)

The plots below show the change rate (${doubling.rate}_{confirmed.cases}$) in **number of days for confirmed cases to double** ($days_{confirmed.cases.double}$). This is expressed as

${doubling.rate}_{confirmed.cases} = \frac{1}{days_{confirmed.cases.double}}$

In this representation a factor of 1 means cases double every day, 0.5 means cases double every 2nd day, 0.33 means cases double every third daty, 0.25 menas cases double every 4th day, etc. When the line approaches 0, no more cases are identified.  
Plots begin at **more than 100 confirmed cases**.   
(Plots inspired by Jennifer Bardwell, Jim Bardwell).


In [33]:
df = pd.read_csv(cv19_c,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def double_rate(name,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    r = pd.Series([np.nan for x in range(len(s.values))],index=s.index)
    for x in range(len(s.values)-1,0,-1):
        if s[x]>100:   # We start after 100 cases has been identified
            half=s[x]/2
            mask=[s < half][0] 
            half_idx = mask.values.argmin()
            frac_day=1-(half-s[half_idx-1])/(s[half_idx]-s[half_idx-1])
            doubling_days=x-half_idx+frac_day
            r[x]=1/doubling_days

    d=r    
    title='{} ({:,} Cases)'.format(name,s[-1])
    opts = {'title':title,'ylim':(0,1),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline2=hv.HLine(0.5).opts(color='red', line_width=1,line_dash='solid')
    hline3=hv.HLine(0.33).opts(color='red', line_width=1,line_dash='dashed')
    hline5=hv.HLine(0.2).opts(color='red', line_width=1,line_dash='dashdot')
    hline10=hv.HLine(0.1).opts(color='red', line_width=1,line_dash='dotted')
    text2d = hv.Text(s.index[1],0.53,'2 days',fontsize=8)
    text3d = hv.Text(s.index[1],0.36,'3 days',fontsize=8)
    text5d = hv.Text(s.index[1],0.23,'5 days',fontsize=8)
    text10d = hv.Text(s.index[1],0.13,'10 days',fontsize=8)
    hstate = (lines * dots  *hline2 * hline3 * hline5 *hline10 * text2d * text3d * text5d * text10d).opts(show_legend=True)
    return hstate

ylim=(0,1)
ylabel='Inverse Number of days for Confirmed Cases to Double'
title_format='COVID-19 Days for Confirmed Cases to Double: {}'.format(df.columns[-1].date())
ol=[]
for i in countries:
    ol.append(double_rate(i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='DoublingDaysDeaths'></a>
## Number of Days to Double Deaths: Country Comparison
[back to top](#top)

The plots below show the change rate (${doubling.rate}_{deaths}$) in **number of days for deaths to double** ($days_{deaths.double}$). This is expressed as

${doubling.rate}_{deaths} = \frac{1}{days_{deaths.double}}$

In this representation a factor of 1 means death counts double every day, 0.5 means death counts double every 2nd day, 0.33 means death counts double every third day, 0.25 menas death counts double every 4th day, etc. When the line approaches 0, no more deaths are counted.  
Plots begin at **more than 25 deaths**.  
*(Plots inspired by Jennifer Bardwell, Jim Bardwell).*


In [34]:
df = pd.read_csv(cv19_d,na_values=0)
df.fillna(0,inplace=True)
df.iloc[:,5:] = df.iloc[:,5:].astype(int)

df.rename(columns={'Country/Region': 'country', 'Province/State': 'state',
                   'Lat': 'lat', 'Long': 'lon'}, inplace = True)
df.columns = df.columns[0:4].append(pd.to_datetime(df.columns[4:]))

df = df[[x in countries for x in df.country.values]]

def double_rate(name,logy=False):
    country = df[(df.country==name)]
    s = country.select_dtypes(np.int).sum()
    r = pd.Series([np.nan for x in range(len(s.values))],index=s.index)
    for x in range(len(s.values)-1,0,-1):
        if s[x]>25:   # We start after 25 cases has been identified
            half=s[x]/2
            mask=[s < half][0] 
            half_idx = mask.values.argmin()
            frac_day=1-(half-s[half_idx-1])/(s[half_idx]-s[half_idx-1])
            doubling_days=x-half_idx+frac_day
            r[x]=1/doubling_days

    d=r    
    title='{} ({:,} Deaths)'.format(name,s[-1])
    opts = {'title':title,'ylim':(0,1),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline2=hv.HLine(0.5).opts(color='red', line_width=1,line_dash='solid')
    hline3=hv.HLine(0.33).opts(color='red', line_width=1,line_dash='dashed')
    hline5=hv.HLine(0.2).opts(color='red', line_width=1,line_dash='dashdot')
    hline10=hv.HLine(0.1).opts(color='red', line_width=1,line_dash='dotted')
    text2d = hv.Text(s.index[1],0.53,'2 days',fontsize=8)
    text3d = hv.Text(s.index[1],0.36,'3 days',fontsize=8)
    text5d = hv.Text(s.index[1],0.23,'5 days',fontsize=8)
    text10d = hv.Text(s.index[1],0.13,'10 days',fontsize=8)
    hstate = (lines * dots  *hline2 * hline3 * hline5 *hline10 * text2d * text3d * text5d * text10d).opts(show_legend=True)
    return hstate

ylim=(0,1)
ylabel='Inverse Number of days for COVID-19 Deaths to Double'
title_format='COVID-19 Days for Deaths (>25) to Double: {}'.format(df.columns[-1].date())
ol=[]
for i in countries:
    ol.append(double_rate(i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='DoublingDaysConfirmedCasesUS'></a>
## Number of Days to Double Confirmed Cases: U.S. States Comparison
[back to top](#top)

The plots below show the change rate (${doubling.rate}_{confirmed.cases}$) in **number of days for confirmed cases to double** ($days_{confirmed.cases.double}$). This is expressed as

${doubling.rate}_{confirmed.cases} = \frac{1}{days_{confirmed.cases.double}}$

In this representation a factor of 1 means cases double every day, 0.5 means cases double every 2nd day, 0.33 means cases double every third daty, 0.25 menas cases double every 4th day, etc. When the line approaches 0, no more cases are identified.  
Plots begin at **more than 100 confirmed cases**.   
(Plots inspired by Jennifer Bardwell, Jim Bardwell).


In [40]:
df=df_cu

def double_rate(name,code,logy=False,startdate=dt.datetime(2020,3,7).date()):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    r = pd.Series([np.nan for x in range(len(s.values))],index=s.index)
    for x in range(len(s.values)-1,0,-1):
        if s[x]>100:   # We start after 25 cases has been identified
            half=s[x]/2
            mask=[s < half][0] 
            half_idx = mask.values.argmin()
            frac_day=1-(half-s[half_idx-1])/(s[half_idx]-s[half_idx-1])
            doubling_days=x-half_idx+frac_day
            r[x]=1/doubling_days

    d=r    
    xlim=(startdate,dt.datetime.now().date()+dt.timedelta(1))
    title='{} ({:,} Cases)'.format(name,s[-1])
    opts = {'title':title,'xlim':xlim,'ylim':(0,1),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline2=hv.HLine(0.5).opts(color='red', line_width=1,line_dash='solid')
    hline3=hv.HLine(0.33).opts(color='red', line_width=1,line_dash='dashed')
    hline5=hv.HLine(0.2).opts(color='red', line_width=1,line_dash='dashdot')
    hline10=hv.HLine(0.1).opts(color='red', line_width=1,line_dash='dotted')
    text2d = hv.Text(startdate+dt.timedelta(2),0.53,'2 days',fontsize=8)
    text3d = hv.Text(startdate+dt.timedelta(2),0.36,'3 days',fontsize=8)
    text5d = hv.Text(startdate+dt.timedelta(2),0.23,'5 days',fontsize=8)
    text10d = hv.Text(startdate+dt.timedelta(2),0.13,'10 days',fontsize=8)
    hstate = (lines * dots  *hline2 * hline3 * hline5 *hline10 * text2d * text3d * text5d * text10d).opts(show_legend=False)
    return hstate

ylim=(0,1)
ylabel='Inverse Number of days for U.S. COVID-19 Confirmed Cases to Double'
title_format='U.S. COVID-19 Days for Confirmed Cases (>100) to Double: {}'.format(df.columns[-1].date())
ol=[]
for i in states:
    ol.append(double_rate(*i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='DoublingDaysDeathsUS'></a>
## Number of Days to Double Deaths: U.S. States Comparison
[back to top](#top)

The plots below show the change rate (${doubling.rate}_{deaths}$) in **number of days for deaths to double** ($days_{deaths.double}$). This is expressed as

${doubling.rate}_{deaths} = \frac{1}{days_{deaths.double}}$

In this representation a factor of 1 means death counts double every day, 0.5 means death counts double every 2nd day, 0.33 means death counts double every third day, 0.25 menas death counts double every 4th day, etc. When the line approaches 0, no more deaths are counted.  
Plots begin at **more than 10 deaths**.  
*(Plots inspired by Jennifer Bardwell, Jim Bardwell).*


In [39]:
df=df_du

def double_rate(name,code,logy=False,startdate=dt.datetime(2020,3,7).date()):
    state = df[(df.state==name) | (df.state==code)]
    s = state.select_dtypes(np.int).sum()
    r = pd.Series([np.nan for x in range(len(s.values))],index=s.index)
    for x in range(len(s.values)-1,0,-1):
        if s[x]>10:   # We start after 25 cases has been identified
            half=s[x]/2
            mask=[s < half][0] 
            half_idx = mask.values.argmin()
            frac_day=1-(half-s[half_idx-1])/(s[half_idx]-s[half_idx-1])
            doubling_days=x-half_idx+frac_day
            r[x]=1/doubling_days

    d=r    
    xlim=(startdate,dt.datetime.now().date()+dt.timedelta(1))
    title='{} ({:,} Deaths)'.format(name,s[-1])
    opts = {'title':title,'xlim':xlim,'ylim':(0,1),'legend': False, 'logy': logy, 'grid': True, 'width': 320, 'height': 200,'padding':0.1,'size':10}
    d.name = name
    lines = d.hvplot(**opts)
    dots = d.hvplot.scatter(**opts)
    hline2=hv.HLine(0.5).opts(color='red', line_width=1,line_dash='solid')
    hline3=hv.HLine(0.33).opts(color='red', line_width=1,line_dash='dashed')
    hline5=hv.HLine(0.2).opts(color='red', line_width=1,line_dash='dashdot')
    hline10=hv.HLine(0.1).opts(color='red', line_width=1,line_dash='dotted')
    text2d = hv.Text(startdate+dt.timedelta(2),0.53,'2 days',fontsize=8)
    text3d = hv.Text(startdate+dt.timedelta(2),0.36,'3 days',fontsize=8)
    text5d = hv.Text(startdate+dt.timedelta(2),0.23,'5 days',fontsize=8)
    text10d = hv.Text(startdate+dt.timedelta(2),0.13,'10 days',fontsize=8)
    hstate = (lines * dots  *hline2 * hline3 * hline5 *hline10 * text2d * text3d * text5d * text10d).opts(show_legend=True)
    return hstate

ylim=(0,1)
ylabel='Inverse Number of days for U.S. COVID-19 Deaths to Double'
title_format='U.S. COVID-19 Days for Deaths (>10) to Double: {}'.format(df.columns[-1].date())
ol=[]
for i in states:
    ol.append(double_rate(*i))

overlay=hv.Layout(ol)
overlay.opts(
    merge_tools=True,title=title_format)
overlay.cols(2)


<a id='SelectedCountiesCases'></a>
### Selected counties compared to state and country
[back to top](#top)
#### Confirmed Cases

In [58]:
# Helper functions
def uplim(states,df,normalize=False,extend=1.2,multiplier=100000):  
    smax=0
    for i in states:
        name,code=i
        state = df[(df.state==name) | (df.state==code)]
        s = state.select_dtypes(np.int).sum()[-1]
        population=state['Population'].sum()
        if normalize:
            s = s/population*multiplier
        if s > smax:
            smax=s   
    return smax*extend,log(smax*extend) 

def hvcounty(df,county,state,normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.county==county) & (df.state==state)]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(county,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10, 'line_width':2}
    s.name = county
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate,s.values[-1]

def hvstate(df,name,normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.state==name)]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(name,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10, 'line_width':1,'line_dash':'dotted'}
    s.name = name+' '
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate,s.values[-1]

def hvcountry(df,name='US',normalize_by_population=False,logy=False,multiplier=100000):
    ss = df[(df.country==name) & (df.county != 'Territory')]
    s = ss.select_dtypes(np.int).sum()
    population=ss['Population'].sum()
#     print('Population {}: {}'.format(name,population))
    if normalize_by_population:
        s = s/population*multiplier
    opts = {'legend': True, 'logy': logy, 'grid': True, 'width': 700, 'height': 400,'padding':0.1,'size':10,'line_width':1,'line_dash':'dotted'}
    s.name = name
    lines = s.hvplot(**opts)
    dots = s.hvplot.scatter(**opts)
    hstate = lines * dots
    return hstate,s.values[-1]


In [59]:
ylabel='COVID-19 Cases [per 100,000]'
title0='Cases: '
multiplier=100000
df = df_cu
hvstate_list=[]
for state in selected_counties:
    
    sd='2020-03-10'
    ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
    xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

    ol=[]
    scounties=selected_counties[state]
    scounties.sort()
    maxs=0
    for logscale in [False,True]: 
        sline,s=hvstate(df,state,True,logscale,multiplier)
        if s>maxs: maxs=s
        uline,s=hvcountry(df,'US',True,logscale,multiplier)
        if s>maxs: maxs=s
        clines=[]
        for i in scounties:
            cline,s=hvcounty(df,i,state,True,logscale,multiplier)
            clines.append(cline)
            if s>maxs: maxs=s

        title = title0+state
        ylim=(0,maxs*1.05)
        legend_position='top_left'
        show_legend=True
        if logscale:
            title+=' Log. Scale'
            ylim=(0.01,maxs*1.05)
            legend_position='bottom_right'
            show_legend=False

        
            

        lines=[sline,uline]+clines
        overlay = hv.Overlay(lines).opts(height=300,width=360,title_format=title, xlim=xlim,ylim=ylim,
                ylabel=ylabel,xlabel='Date',xrotation=45,active_tools=['box_zoom'],
                                         legend_position=legend_position,show_legend=show_legend)

        ol.append(overlay)
    layout=hv.Layout(ol).opts(shared_axes=False,toolbar='above')
    hvstate_list.append(layout)
lo=hv.Layout(hvstate_list).opts(shared_axes=False,title='COVID-19 Confirmed Cases in Selected Counties ')
lo.cols(2)

<a id='SelectedCountiesDeaths'></a>
#### Deaths
[back to top](#top)

In [60]:
ylabel='COVID-19 Deaths [per 100,000]'
title0='Deaths: '
multiplier=100000
df = df_du
hvstate_list=[]
for state in selected_counties:
    
    sd='2020-03-10'
    ed=(df.columns[-1]+dt.timedelta(1)).strftime('%Y-%m-%d')
    xlim=(pd.date_range(sd,ed)[0],pd.date_range(sd,ed)[-1])

    ol=[]
    scounties=selected_counties[state]
    scounties.sort()
    maxs=0
    for logscale in [False,True]: 
        sline,s=hvstate(df,state,True,logscale,multiplier)
        if s>maxs: maxs=s
        uline,s=hvcountry(df,'US',True,logscale,multiplier)
        if s>maxs: maxs=s
        clines=[]
        for i in scounties:
            cline,s=hvcounty(df,i,state,True,logscale,multiplier)
            clines.append(cline)
            if s>maxs: maxs=s

        title = title0+state
        ylim=(0,maxs*1.05)
        legend_position='top_left'
        show_legend=True
        if logscale:
            title+=' Log. Scale'
            ylim=(0.01,maxs*1.05)
            legend_position='bottom_right'
            show_legend=False

        
            

        lines=[sline,uline]+clines
        overlay = hv.Overlay(lines).opts(height=300,width=360,title_format=title, xlim=xlim,ylim=ylim,
                ylabel=ylabel,xlabel='Date',xrotation=45,active_tools=['box_zoom'],
                                         legend_position=legend_position,show_legend=show_legend)

        ol.append(overlay)
    layout=hv.Layout(ol).opts(shared_axes=False)
    hvstate_list.append(layout)
lo=hv.Layout(hvstate_list).opts(shared_axes=False,title='COVID-19 Deaths in Selected Counties ')
lo.cols(2)

<a id='Reading'></a>

# We hope these data are informative and convey how seriously we have to take the COVID-19 pandemic. Stay safe.


## Suggested Reading:
[back to top](#top)


[Coronavirus Interview with Larry Brilliant](https://www.wired.com/story/coronavirus-interview-larry-brilliant-smallpox-epidemiologist/?utm_source=pocket-newtab)

<a id='DataSources'></a>
## Data Sources
[back to top](#top)
### Johns Hopkins University daily updated COVID-19 data
COVID-19 confirmed cases, deaths and recovered cases data are streamed from the [The Center for Systems Science and Engineering (CSSE)](https://systems.jhu.edu) at Johns Hopkins University. The CCSE COVID-19 [GitHub Repo](https://github.com/CSSEGISandData/COVID-19) has more information about these data and their sources.

We acknowldege Sooth Sawyer who helped out by compiling the data set in the old format at:
[https://www.soothsawyer.com](https://www.soothsawyer.com/john-hopkins-time-series-data-confirmed-case-csv-after-march-22-2020/?github=1)

### UN Population Data
We obtain the Population data from UN statistics.  [UN Population Data Sets](https://population.un.org/wpp/Download/Standard/Population/) have more information about these data and their sources.

### US Population Data
US population data ar obtained from US Census statistics.  [US Population Data Sets](http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv?#) have more information about these data and their sources.