# 1. Country Level Information

In [27]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import csv
from IPython.display import display

In [28]:
# Two data source
# 1. Cases Data: JHU CSSE (https://github.com/CSSEGISandData/COVID-19)
# 2. Population data: (https://storage.guidotti.dev/covid19/data/) + Worldometer(https://www.worldometers.info/population/) + Wikipedia

In [29]:
# this github repo contains timeseries data for all coronavirus cases: https://github.com/CSSEGISandData/COVID-19
confirmed_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" \
                      "csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" \
             "csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/" \
             "csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
# This repo contains population data for coronavirus cases
population_url = "https://storage.guidotti.dev/covid19/data/world-1.csv"

In [30]:
date_pattern = re.compile(r"\d{1,2}/\d{1,2}/\d{2}")
def reformat_dates(col_name: str) -> str:
    # for columns which are dates, I'd much rather they were in day/month/year format
    try:
        return date_pattern.sub(datetime.strptime(col_name, "%m/%d/%y").strftime("%d/%m/%Y"), col_name, count=1)
    except ValueError:
        return col_name

In [31]:
renamed_columns_map = {
   "Country/Region": "country",
   "Province/State": "Province_State",
   "Lat": "Latitude",
   "Long": "Longitude"
}

cols_to_drop = ["Province_State", "Latitude", "Longitude"]

confirmed_cases_df = (
   pd.read_csv(confirmed_cases_url)
   .rename(columns=renamed_columns_map)
   .rename(columns=reformat_dates)
   .drop(columns=cols_to_drop)
)

deaths_df = (
   pd.read_csv(deaths_url)
   .rename(columns=renamed_columns_map)
   .rename(columns=reformat_dates)
   .drop(columns=cols_to_drop)
)
recovered_df = (
   pd.read_csv(recovered_url)
   .rename(columns=renamed_columns_map)
   .rename(columns=reformat_dates)
   .drop(columns=cols_to_drop)
)

display(confirmed_cases_df.head())
display(deaths_df.head())
display(recovered_df.head())

Unnamed: 0,country,22/01/2020,23/01/2020,24/01/2020,25/01/2020,26/01/2020,...,19/04/2020,20/04/2020,21/04/2020,22/04/2020,23/04/2020,24/04/2020
0,Afghanistan,0,0,0,0,0,...,996,1026,1092,1176,1279,1351
1,Albania,0,0,0,0,0,...,562,584,609,634,663,678
2,Algeria,0,0,0,0,0,...,2629,2718,2811,2910,3007,3127
3,Andorra,0,0,0,0,0,...,713,717,717,723,723,731
4,Angola,0,0,0,0,0,...,24,24,24,25,25,25


Unnamed: 0,country,22/01/2020,23/01/2020,24/01/2020,25/01/2020,26/01/2020,...,19/04/2020,20/04/2020,21/04/2020,22/04/2020,23/04/2020,24/04/2020
0,Afghanistan,0,0,0,0,0,...,33,36,36,40,42,43
1,Albania,0,0,0,0,0,...,26,26,26,27,27,27
2,Algeria,0,0,0,0,0,...,375,384,392,402,407,415
3,Andorra,0,0,0,0,0,...,36,37,37,37,37,40
4,Angola,0,0,0,0,0,...,2,2,2,2,2,2


Unnamed: 0,country,22/01/2020,23/01/2020,24/01/2020,25/01/2020,26/01/2020,...,19/04/2020,20/04/2020,21/04/2020,22/04/2020,23/04/2020,24/04/2020
0,Afghanistan,0,0,0,0,0,...,131,135,150,166,179,188
1,Albania,0,0,0,0,0,...,314,327,345,356,385,394
2,Algeria,0,0,0,0,0,...,1047,1099,1152,1204,1355,1408
3,Andorra,0,0,0,0,0,...,235,248,282,309,333,344
4,Angola,0,0,0,0,0,...,6,6,6,6,6,6


In [32]:
# extract out just the relevant geographical data and join it to another .csv which has the country codes.
# The country codes are required for the plotting function to identify countries on the map
geo_data_df = confirmed_cases_df[["country"]].drop_duplicates()
country_codes_df = (
    pd.read_csv(
        "country_code_mapping.csv",
        usecols=["country", "alpha-3_code"],
        index_col="country")
)
geo_data_df = geo_data_df.join(country_codes_df, how="left", on="country").set_index("country")

In [33]:
dates_list = (
    deaths_df.filter(regex=r"(\d{2}/\d{2}/\d{4})", axis=1)
    .columns
    .to_list()
)

# create a mapping of date -> dataframe, where each df holds the daily counts of cases and deaths per country
cases_by_date = {}
cases = []
for date in dates_list:
    confirmed_cases_day_df = (
        confirmed_cases_df
        .filter(like=date, axis=1)
        .rename(columns=lambda col: "Total_Confirmed_Cases")
    )
    deaths_day_df = deaths_df.filter(like=date, axis=1).rename(columns=lambda col: "Total_Fatalities")
    recovered_day_df = recovered_df.filter(like=date, axis=1).rename(columns=lambda col: "Total_Recovered_Cases")
    cases_df = confirmed_cases_day_df.join(deaths_day_df).join(recovered_day_df)
    cases_df['Date'] = date
    cases_df = cases_df.set_index(confirmed_cases_df["country"])

    date_df = (
        geo_data_df.join(cases_df)
        .groupby("country")
        .agg({"Total_Confirmed_Cases": "sum", "Total_Fatalities": "sum", "Total_Recovered_Cases": "sum", 'Date': "first", "alpha-3_code": "first"})
    )
    date_df = date_df.reset_index()
    date_df = date_df.rename(columns={"country":"Country_Region"})
    date_df.Total_Recovered_Cases=date_df.Total_Recovered_Cases.astype(int)
    
    cases_by_date[date] = date_df

cases = pd.DataFrame()
for _, data in cases_by_date.items():
    cases = pd.concat([cases,data])

cases = cases.reset_index(drop=True)

    
# The dataframe for each day stored in dictionary looks something like this:
display(cases_by_date[dates_list[0]].head())
# The whole dataframe after concatenation
display(cases)

Unnamed: 0,Country_Region,Total_Confirmed_Cases,Total_Fatalities,Total_Recovered_Cases,Date,alpha-3_code
0,Afghanistan,0,0,0,22/01/2020,AFG
1,Albania,0,0,0,22/01/2020,ALB
2,Algeria,0,0,0,22/01/2020,DZA
3,Andorra,0,0,0,22/01/2020,AND
4,Angola,0,0,0,22/01/2020,AGO


Unnamed: 0,Country_Region,Total_Confirmed_Cases,Total_Fatalities,Total_Recovered_Cases,Date,alpha-3_code
0,Afghanistan,0,0,0,22/01/2020,AFG
1,Albania,0,0,0,22/01/2020,ALB
2,Algeria,0,0,0,22/01/2020,DZA
3,Andorra,0,0,0,22/01/2020,AND
4,Angola,0,0,0,22/01/2020,AGO
...,...,...,...,...,...,...
17385,West Bank and Gaza,484,4,10,24/04/2020,
17386,Western Sahara,6,0,0,24/04/2020,ESH
17387,Yemen,1,0,0,24/04/2020,YEM
17388,Zambia,84,3,37,24/04/2020,ZMB


In [34]:
# Save for map development
with open('cases_by_date.csv', 'w') as f:  # Just use 'w' mode in 3.x
    w = csv.DictWriter(f, cases_by_date.keys())
    w.writeheader()
    w.writerow(cases_by_date)

In [35]:
# Get the information of country population from https://storage.guidotti.dev/covid19/data/
population = pd.read_csv('world-1.csv', index_col=0)
population

Unnamed: 0_level_0,date,country,state,city,lat,lng,...,pop_14,pop_15_64,pop_65,pop_age,pop_density,pop_death_rate
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Afghanistan|NA|NA,2020-01-22,Afghanistan,,,33.000000,65.000000,...,0.430902,0.543249,0.025849,19.5,56.937760,0.006575
Albania|NA|NA,2020-01-22,Albania,,,41.153300,20.168300,...,0.176729,0.685824,0.137447,34.3,104.612263,0.007714
Algeria|NA|NA,2020-01-22,Algeria,,,28.033900,1.659600,...,0.301487,0.634888,0.063625,28.9,17.730075,0.004717
Andorra|NA|NA,2020-01-22,Andorra,,,42.506300,1.521800,...,0.156000,0.714000,0.130000,46.2,163.842553,0.004300
Angola|NA|NA,2020-01-22,Angola,,,-11.202700,17.873900,...,0.468089,0.509747,0.022164,15.9,24.713052,0.008432
...,...,...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza|NA|NA,2020-04-10,West Bank and Gaza,,,31.952200,35.233200,...,0.389474,0.579193,0.031333,21.9,758.984551,0.003469
Western Sahara|NA|NA,2020-04-10,Western Sahara,,,24.215500,-12.885800,...,,,,,,
Yemen|NA|NA,2020-04-10,Yemen,,,15.552727,48.516388,...,,,,,,
Zambia|NA|NA,2020-04-10,Zambia,,,-15.416700,28.283300,...,0.449361,0.529642,0.020997,16.9,23.341479,0.006633


In [36]:
countries_1 = cases.Country_Region.unique()
countries_1

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guinea', 'Guine

In [37]:
countries_2 = population.country.unique()

In [38]:
set(countries_1) - set(countries_2) 

{'Holy See', 'Taiwan*'}

In [39]:
set(countries_2) - set(countries_1) 

set()

In [40]:
country_pop_dict = {}
for country in countries_2:
    country_pop_dict[country] = population.loc[population.country == country, 'pop'].unique()
# suppliment population information from worldometer: https://www.worldometers.info/population/
country_pop_dict['Diamond Princess'] = 3711 
country_pop_dict['Holy See'] = 801
country_pop_dict['Taiwan*'] = 23816775
country_pop_dict['Botswana'] = 2340244 
country_pop_dict['Burma'] = 54409800
country_pop_dict['Burundi'] = 11890784 
country_pop_dict['Kosovo'] = 1810366 
country_pop_dict['MS Zaandam'] = 1829 
country_pop_dict['Malawi'] = 19129952
country_pop_dict['Sierra Leone'] = 7976983
country_pop_dict['South Sudan'] = 11193725
country_pop_dict['Western Sahara'] = 597339
country_pop_dict['Sao Tome and Principe'] = 219159
country_pop_dict['Yemen'] = 29825964

country_pop_dict  

{'Afghanistan': array([37172386.]),
 'Albania': array([2866376.]),
 'Algeria': array([42228429.]),
 'Andorra': array([77006.]),
 'Angola': array([30809762.]),
 'Antigua and Barbuda': array([96286.]),
 'Argentina': array([44494502.]),
 'Armenia': array([2951776.]),
 'Australia': array([24992369.]),
 'Austria': array([8847037.]),
 'Azerbaijan': array([9942334.]),
 'Bahamas': array([385640.]),
 'Bahrain': array([1569439.]),
 'Bangladesh': array([1.61356039e+08]),
 'Barbados': array([286641.]),
 'Belarus': array([9485386.]),
 'Belgium': array([11422068.]),
 'Belize': array([383071.]),
 'Benin': array([11485048.]),
 'Bhutan': array([754394.]),
 'Bolivia': array([11353142.]),
 'Bosnia and Herzegovina': array([3323929.]),
 'Botswana': 2340244,
 'Brazil': array([2.09469333e+08]),
 'Brunei': array([428962.]),
 'Bulgaria': array([7024216.]),
 'Burkina Faso': array([19751535.]),
 'Burma': 54409800,
 'Burundi': 11890784,
 'Cabo Verde': array([543767.]),
 'Cambodia': array([16249798.]),
 'Cameroon'

In [41]:
for country in countries_1:
    cases.loc[cases.Country_Region == country, 'Population'] = country_pop_dict[country]

In [42]:
cases[(cases.Population.isnull())&(cases.Date=='24/04/2020')]

Unnamed: 0,Country_Region,Total_Confirmed_Cases,Total_Fatalities,Total_Recovered_Cases,Date,alpha-3_code,Population


In [43]:
# New cases each day could be calculated by subtracting the number of cases of the present day by the number of the 
# previous day if it's not the first day. For the first day '2020-01-22' the total cases would be equal to the new cases.

# Calculate new confirmed cases
cases.loc[cases.Date == dates_list[0], 'New_Confirmed_Cases'] = cases.loc[cases.Date == dates_list[0], 'Total_Confirmed_Cases']
cases.loc[cases.Date != dates_list[0], 'New_Confirmed_Cases'] = cases.loc[cases.Date != dates_list[0], 'Total_Confirmed_Cases'].values - cases.loc[cases.Date != dates_list[-1], 'Total_Confirmed_Cases'].values
# Calculate new recovered cases
cases.loc[cases.Date == dates_list[0], 'New_Recovered_Cases'] = cases.loc[cases.Date == dates_list[0], 'Total_Recovered_Cases']
cases.loc[cases.Date != dates_list[0], 'New_Recovered_Cases'] = cases.loc[cases.Date != dates_list[0], 'Total_Recovered_Cases'].values - cases.loc[cases.Date != dates_list[-1], 'Total_Recovered_Cases'].values
# Calculate new fatalities
cases.loc[cases.Date == dates_list[0], 'New_Fatalities'] = cases.loc[cases.Date == dates_list[0], 'Total_Fatalities']
cases.loc[cases.Date != dates_list[0], 'New_Fatalities'] = cases.loc[cases.Date != dates_list[0], 'Total_Fatalities'].values - cases.loc[cases.Date != dates_list[-1], 'Total_Fatalities'].values
# Calculate active confirmed cases (= total_confirmed - total_recovered - total_fatalities)
cases['Remaining_Confirmed_Cases'] = cases.Total_Confirmed_Cases - cases.Total_Recovered_Cases - cases.Total_Fatalities

cases


Unnamed: 0,Country_Region,Total_Confirmed_Cases,Total_Fatalities,Total_Recovered_Cases,Date,alpha-3_code,Population,New_Confirmed_Cases,New_Recovered_Cases,New_Fatalities,Remaining_Confirmed_Cases
0,Afghanistan,0,0,0,22/01/2020,AFG,37172386.0,0.0,0.0,0.0,0
1,Albania,0,0,0,22/01/2020,ALB,2866376.0,0.0,0.0,0.0,0
2,Algeria,0,0,0,22/01/2020,DZA,42228429.0,0.0,0.0,0.0,0
3,Andorra,0,0,0,22/01/2020,AND,77006.0,0.0,0.0,0.0,0
4,Angola,0,0,0,22/01/2020,AGO,30809762.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...
17385,West Bank and Gaza,484,4,10,24/04/2020,,4569087.0,4.0,0.0,0.0,470
17386,Western Sahara,6,0,0,24/04/2020,ESH,597339.0,0.0,0.0,0.0,6
17387,Yemen,1,0,0,24/04/2020,YEM,29825964.0,0.0,0.0,0.0,1
17388,Zambia,84,3,37,24/04/2020,ZMB,17351822.0,8.0,0.0,0.0,44


In [44]:
# Modify the datatype and column order
cases.Total_Recovered_Cases = cases.Total_Recovered_Cases.astype(int)
cases.Population = cases.Population.astype(int)
cases.New_Confirmed_Cases = cases.New_Confirmed_Cases.astype(int)
cases.New_Recovered_Cases = cases.New_Recovered_Cases.astype(int)
cases.New_Fatalities = cases.New_Fatalities.astype(int)
cases.Remaining_Confirmed_Cases = cases.Remaining_Confirmed_Cases.astype(int)
cases = cases[['Date', 'Country_Region', 'alpha-3_code', 'Population', 'Total_Confirmed_Cases', 'Total_Fatalities',
              'Total_Recovered_Cases', 'New_Confirmed_Cases', 'New_Fatalities', 'New_Recovered_Cases',
              'Remaining_Confirmed_Cases']]
cases['Date'] =pd.to_datetime(cases.Date, format="%d/%m/%Y")
cases

Unnamed: 0,Date,Country_Region,alpha-3_code,Population,Total_Confirmed_Cases,Total_Fatalities,Total_Recovered_Cases,New_Confirmed_Cases,New_Fatalities,New_Recovered_Cases,Remaining_Confirmed_Cases
0,2020-01-22,Afghanistan,AFG,37172386,0,0,0,0,0,0,0
1,2020-01-22,Albania,ALB,2866376,0,0,0,0,0,0,0
2,2020-01-22,Algeria,DZA,42228429,0,0,0,0,0,0,0
3,2020-01-22,Andorra,AND,77006,0,0,0,0,0,0,0
4,2020-01-22,Angola,AGO,30809762,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
17385,2020-04-24,West Bank and Gaza,,4569087,484,4,10,4,0,0,470
17386,2020-04-24,Western Sahara,ESH,597339,6,0,0,0,0,0,6
17387,2020-04-24,Yemen,YEM,29825964,1,0,0,0,0,0,1
17388,2020-04-24,Zambia,ZMB,17351822,84,3,37,8,0,0,44


In [45]:
# Save as csv for Kaggle uploading
cases.to_csv('Country_Level_Info.csv', index=False)

In [46]:
# Prepare df for UI development
cases_2 = cases.groupby(['Date'], as_index=True, sort=False).sum()
cases_2['Country_Region'] = 'Globe'
cases_2.reset_index(inplace=True)
cases_3 = pd.concat([cases, cases_2], ignore_index=True, join='outer', sort=False)
cases_3.loc[cases_3.Country_Region == 'Globe', 'Population'] = 7.8 * 10**9 # From Wikipedia
cases_3['Date'] =pd.to_datetime(cases_3.Date, format="%d/%m/%Y")
#cases_3 = cases_3.sort_values(['Date','Country_Region'])
#[cases_3.Country_Region == 'Globe'].sort_values(['Date', ascending=1, inplace=True)
# Save
cases_3.to_csv('Country_Level_Info_2.csv', index=False)

In [47]:
# Next step would be to add more information like the number of probable individuals and total population to this dataset

## For each specific country, province/state level or even city level case numbers could be added to provide more specific information, which might require more colloboration from kagglers from different country.

## COVID-19 Map

In [48]:
from _plotly_future_ import v4_subplots
from plotly import graph_objs as go
from plotly.subplots import make_subplots
%matplotlib inline

pd.options.display.max_columns = 12

In [49]:
# helper function for when we produce the frames for the map animation
def frame_args(duration):
    return {
        "frame": {"duration": duration},
        "mode": "immediate",
        "fromcurrent": True,
        "transition": {"duration": duration, "easing": "linear"},
    }

In [50]:
fig = make_subplots(rows=2, cols=1, specs=[[{"type": "scattergeo"}], [{"type": "xy"}]], row_heights=[0.8, 0.2])

# set up the geo data, the slider, the play and pause buttons, and the title
fig.layout.geo = {"showcountries": True}
fig.layout.sliders = [{"active": 0, "steps": []}]
fig.layout.updatemenus = [
    {
        "type": "buttons",
        "buttons": [
            {
                "label": "&#9654;",  # play symbol
                "method": "animate",
                "args": [None, frame_args(250)],
            },
            {
                "label": "&#9724;",
                "method": "animate",  # stop symbol
                "args": [[None], frame_args(0)],
            },
        ],
        "showactive": False,
        "direction": "left",
    }
]
#fig.layout.title = {"text": "COVID-19 Global Situation", "x": 0.5}

In [51]:
frames = []
steps = []
# set up colourbar tick values, ranging from 1 to the highest num. of confirmed cases for any country thus far
max_country_confirmed_cases = cases_by_date[dates_list[-1]]["Total_Confirmed_Cases"].max()

# to account for the significant variance in number of cases, we want the scale to be logarithmic...
high_tick = np.log1p(max_country_confirmed_cases)
low_tick = np.log1p(1)
log_tick_values = np.geomspace(low_tick, high_tick, num=6)

# ...however, we want the /labels/ on the scale to be the actual number of cases (i.e. not log(n_cases))
visual_tick_values = np.expm1(log_tick_values).astype(int)
# explicitly set max cbar value, otherwise it might be max - 1 due to a rounding error
visual_tick_values[-1] = max_country_confirmed_cases  
visual_tick_values = [f"{val:,}" for val in visual_tick_values]

# generate line chart data
# list of tuples: [(confirmed_cases, deaths), ...]
cases_deaths_totals = [(df.filter(like="Total_Confirmed_Cases").astype("uint32").agg("sum")[0], 
                        df.filter(like="Total_Fatalities").astype("uint32").agg("sum")[0],
                        df.filter(like="Total_Recovered_Cases").astype("uint32").agg("sum")[0])
                          for df in cases_by_date.values()]

confirmed_cases_totals = [daily_total[0] for daily_total in cases_deaths_totals]
deaths_totals =[daily_total[1] for daily_total in cases_deaths_totals]
recovered_cases_totals = [daily_total[2] for daily_total in cases_deaths_totals]

# this loop generates the data for each frame
for i, (date, data) in enumerate(cases_by_date.items(), start=1):
    df = data

    # the z-scale (for calculating the colour for each country) needs to be logarithmic
    df["confirmed_cases_log"] = np.log1p(df["Total_Confirmed_Cases"])

    df["text"] = (
        date
        + "<br>"
        + df["Country_Region"]
        + "<br>Confirmed cases: "
        + df["Total_Confirmed_Cases"].apply(lambda x: "{:,}".format(x))
        + "<br>Deaths: "
        + df["Total_Fatalities"].apply(lambda x: "{:,}".format(x))
        + "<br>Recovered cases: "
        + df["Total_Recovered_Cases"].apply(lambda x: "{:,}".format(x))
    )
    
    confirmed_cases = df["Total_Confirmed_Cases"].sum()
    deaths = df["Total_Fatalities"].sum()
    mortality_rate = deaths / confirmed_cases
    recovered_cases = df["Total_Recovered_Cases"].sum()
    recovery_rate = recovered_cases / confirmed_cases
    
    # create the choropleth chart
    choro_trace = go.Choropleth(
        **{
            "locations": df["alpha-3_code"],
            "z": df["confirmed_cases_log"],
            "zmax": high_tick,
            "zmin": low_tick,
            "colorscale": "reds",
            "colorbar": {
                "ticks": "outside",
                "ticktext": visual_tick_values,
                "tickmode": "array",
                "tickvals": log_tick_values,
                "title": {"text": "<b>Confirmed Cases</b>"},
                "len": 0.8,
                "y": 1,
                "yanchor": "top"
            },
            "hovertemplate": df["text"],
            "name": "",
            "showlegend": False
        }
    )
    
    # create the confirmed cases trace
    confirmed_cases_trace = go.Scatter(
        x=dates_list,
        y=confirmed_cases_totals[:i],
        mode="markers" if i == 1 else "lines",
        name="Total Confirmed Cases",
        line={"color": "Red"},
        hovertemplate="%{x}<br>Total Confirmed Cases: %{y:,}<extra></extra>"
    )
    
    mortalities = [a/b for a,b in zip(deaths_totals[:i], confirmed_cases_totals[:i])]
    # create the deaths trace
    deaths_trace = go.Scatter(
        x=dates_list,
        y=deaths_totals[:i],
        mode="markers" if i == 1 else "lines",
        name="Total Fatalities",
        line={"color": "Black"},
        hovertemplate="%{x}<br>Total Fatalities: %{y:,}<extra></extra>"+'<br>%{text}</b>', 
        text = ["Mortality: {:.1%}".format(mortality) for mortality in mortalities] 
    )
    
    recovery_rates = [a/b for a,b in zip(recovered_cases_totals[:i], confirmed_cases_totals[:i])]
    # create the recovered cases trace
    recovered_cases_trace = go.Scatter(
        x=dates_list,
        y=recovered_cases_totals[:i],
        mode="markers" if i == 1 else "lines",
        name="Total Recovered Cases",
        line={"color": "Green"},
        hovertemplate="%{x}<br>Total Recovered Cases: %{y:,}<extra></extra>"+'<br>%{text}</b>',
        text = ["Recovery Rate: {:.1%}".format(recovery_rate) for recovery_rate in recovery_rates]               
    )

    if i == 1:
        # the first frame is what the figure initially shows...
        fig.add_trace(choro_trace, row=1, col=1)
        fig.add_traces([confirmed_cases_trace, deaths_trace, recovered_cases_trace], rows=[2, 2, 2], cols=[1, 1, 1])
    # ...and all the other frames are appended to the `frames` list and slider
    frames.append(dict(data=[choro_trace, confirmed_cases_trace, deaths_trace, recovered_cases_trace], name=date))
    
    step = {
        "method": "animate", #"update",
        "args": [
            [date],
            frame_args(0)
        ],
        "label": date,
    }
    
    steps.append(step) 
    
# tidy up the axes and finalise the chart ready for display
fig.update_xaxes(range=[0, len(dates_list)-1], visible=False)
fig.update_yaxes(range=[0, max(confirmed_cases_totals)])
fig.frames = frames
fig.layout.sliders[0].steps = steps
fig.layout.sliders[0].currentvalue= {"prefix": "Date: "}
fig.layout.geo.domain = {"x": [0,1], "y": [0.2, 1]}
fig.update_layout(height=650, 
                  legend={"x": 0.05, "y": 0.175, "yanchor": "top", "bgcolor": "rgba(0, 0, 0, 0)"}
                 )

fig

In [52]:
# save the figure locally as an interactive HTML page
fig.update_layout(height=1000)
fig.write_html("COVID19_map.html")

# 2. Patient Level Information

In [53]:
# (https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset#covid_19_data.csv)

In [None]:
# NZ patient level information (MOH)