# COVID-19 by Country

Data comes from https://data.gov.hk.

In [1]:
import requests
import datetime
from itertools import product

from ipywidgets import widgets
from IPython.display import display

import numpy as np
import pandas as pd
import geopandas as gpd

from lets_plot import *

In [2]:
LetsPlot.setup_html()

In [3]:
start_date = pd.Timestamp(2020, 1, 14)
end_date = pd.Timestamp.today().round('d') - pd.Timedelta('1 day')

In [4]:
def get_naturalearth_data(data_type="admin_0_countries", columns=["NAME", "geometry"]):
    import shapefile
    from shapely.geometry import shape

    naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \
                       "data/naturalearth/{0}/data.shp?raw=true".format(data_type)
    sf = shapefile.Reader(naturalearth_url)

    gdf = gpd.GeoDataFrame(
        [
            dict(zip([field[0] for field in sf.fields[1:]], record))
            for record in sf.records()
        ],
        geometry=[shape(s) for s in sf.shapes()]
    )[columns]
    gdf.columns = [col.lower() for col in gdf.columns]

    return gdf

In [5]:
def player_widget(plots, *, fps=1):
    interval = max(1, int(1000 / fps))
    player = widgets.Play(min=0, max=len(plots) - 1, step=1, value=0, interval=interval)
    slider = widgets.IntSlider(min=0, max=len(plots) - 1, step=1, value=0)
    widgets.jslink((player, 'value'), (slider, 'value'))
    widget = widgets.HBox([player, slider])
    iout = widgets.interactive_output(lambda n, m: display(plots[n]), {'n': slider, 'm': player})
    return display(widget, iout)

In [6]:
def ffill_columns(df, *, columns=[]):
    def get_ffill_cb():
        last_max_value = 0
        def ffill_cb(value):
            nonlocal last_max_value
            if not np.isnan(value) and value > last_max_value:
                last_max_value = value
            return last_max_value
        return ffill_cb
    for column in columns:
        df[column] = df[column].apply(get_ffill_cb())
    return df

In [7]:
def simplify_geoms(world_gdf, *, tolerance=.5):
    DANGEROUS_GEOMS = ['South Africa']
    stable_gdf = world_gdf[world_gdf.name.isin(DANGEROUS_GEOMS)].copy()
    changeable_gdf = world_gdf[~world_gdf.name.isin(DANGEROUS_GEOMS)].copy()
    changeable_gdf.geometry = changeable_gdf.geometry.simplify(tolerance)
    return pd.concat([stable_gdf, changeable_gdf])

In [8]:
# Prepare the gdf with simplified country polygons
world_gdf = get_naturalearth_data()
world_gdf = simplify_geoms(world_gdf.copy())

In [9]:
# Prepare the df with disease data, combined from two tables
columns_sub = {'As of date': 'date', 'Number of cases': 'cases', 'Number of death cases': 'deaths'}
china_data = pd.read_csv('https://raw.githubusercontent.com'
                         '/HIL-HK/lets-plot-examples/master/data/covid-19/reported_cases_mainland_china.csv')
china_df = pd.DataFrame(china_data)
china_df = china_df.rename(columns=columns_sub)[columns_sub.values()]
china_df['country'] = 'China'
china_df.date = pd.to_datetime(china_df.date, dayfirst=True, errors='coerce')

columns_sub = {'As of date': 'date', 'Other countries/areas': 'country', \
               'Number of cases/confirmed cases': 'cases', 'Number of deaths among confirmed cases': 'deaths'}
world_data = pd.read_csv('https://raw.githubusercontent.com'
                         '/HIL-HK/lets-plot-examples/master/data/covid-19/reported_cases_outside_mainland_china.csv')
world_df = pd.DataFrame(world_data)
world_df = world_df.rename(columns=columns_sub)[columns_sub.values()]
world_df.date = pd.to_datetime(world_df.date, dayfirst=True, errors='coerce')

df = pd.concat([china_df, world_df], sort=False)

In [10]:
end_date = min(china_df.date.max(), world_df.date.max())
df = df[df.date <= end_date]

In [11]:
# In disease data select only the countries that are in world
df = df[df.country.isin(world_gdf.name.unique())]

In [12]:
# Add missing pairs (date, country) to dataframe (for filling gaps in the time scale)
index_tuples = product(pd.date_range(start_date, end_date), world_gdf.name.unique())
multi_index = pd.MultiIndex.from_tuples(index_tuples, names=['date', 'country'])
df = df.groupby(['date', 'country']).max().sort_index().reindex(multi_index).reset_index()

In [13]:
# Fix empty and incorrect 'cases' and 'deaths'
df = pd.concat([
    ffill_columns(df[df.country == country].copy(), columns=['cases', 'deaths'])
    for country in df.country.unique()
]).reset_index(drop=True)
df.cases = df.cases.astype(int)
df.deaths = df.deaths.astype(int)

In [14]:
p = ggplot() + \
    theme(legend_position='none', axis_title='blank', axis_text='blank', \
          axis_ticks='blank', axis_line='blank') + \
    ggsize(600, 450)

plots = []
for current_date in pd.date_range(start_date, end_date):
    current_gdf = world_gdf.merge(df[df.date == current_date], \
                                  left_on='name', right_on='country', how='left')
    plots.append(
        p + \
        geom_polygon(aes(fill='cases'), data=current_gdf, size=.5, color='black', \
                     tooltips=layer_tooltips().line('@name')\
                                              .format('@cases', '.2f')\
                                              .line('cases number|@cases')) + \
        scale_fill_gradient(low='white', high='red', limits=[0, df.cases.max()], trans='sqrt') + \
        ggtitle('COVID-19 on %s' % current_date.strftime('%m/%d/%Y'))
    )

In [15]:
player_widget(plots)

HBox(children=(Play(value=0, interval=1000, max=69), IntSlider(value=0, max=69)))

Output()