# Spread of Covid-19 in the WHOLE World 
## Xiaohan Wang

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mpl_colors
import ipywidgets
import bqplot
import seaborn as sns
import datetime as dt

import geopandas
import bokeh
import json

  import pandas.util.testing as tm


In [2]:
full_df = pd.read_csv('https://query.data.world/s/5hjfxbix4nbqdumjr2tc7kizqenqca')
full_df

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths
0,2019-12-31,Afghanistan,0,0,0,0
1,2020-01-01,Afghanistan,0,0,0,0
2,2020-01-02,Afghanistan,0,0,0,0
3,2020-01-03,Afghanistan,0,0,0,0
4,2020-01-04,Afghanistan,0,0,0,0
...,...,...,...,...,...,...
14152,2020-04-26,Zimbabwe,2,0,31,4
14153,2020-04-27,Zimbabwe,0,0,31,4
14154,2020-04-28,Zimbabwe,1,0,32,4
14155,2020-04-29,Zimbabwe,0,0,32,4


In [3]:
continent_df = pd.read_csv('/Users/W/Downloads/countryContinent.csv')
continent_df

Unnamed: 0,country,code_2,code_3,country_code,iso_3166_2,continent,sub_region,region_code,sub_region_code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,142.0,34.0
1,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,150.0,39.0
2,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,2.0,15.0
3,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,9.0,61.0
4,Andorra,AD,AND,20,ISO 3166-2:AD,Europe,Southern Europe,150.0,39.0
...,...,...,...,...,...,...,...,...,...
243,Wallis and Futuna,WF,WLF,876,ISO 3166-2:WF,Oceania,Polynesia,9.0,61.0
244,Western Sahara,EH,ESH,732,ISO 3166-2:EH,Africa,Northern Africa,2.0,15.0
245,Yemen,YE,YEM,887,ISO 3166-2:YE,Asia,Western Asia,142.0,145.0
246,Zambia,ZM,ZMB,894,ISO 3166-2:ZM,Africa,Eastern Africa,2.0,14.0


In [4]:
full_df_cont = full_df.merge(continent_df[['country','continent','code_3']],
                             how='left',left_on='location',right_on='country')
full_df_cont

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,country,continent,code_3
0,2019-12-31,Afghanistan,0,0,0,0,Afghanistan,Asia,AFG
1,2020-01-01,Afghanistan,0,0,0,0,Afghanistan,Asia,AFG
2,2020-01-02,Afghanistan,0,0,0,0,Afghanistan,Asia,AFG
3,2020-01-03,Afghanistan,0,0,0,0,Afghanistan,Asia,AFG
4,2020-01-04,Afghanistan,0,0,0,0,Afghanistan,Asia,AFG
...,...,...,...,...,...,...,...,...,...
14152,2020-04-26,Zimbabwe,2,0,31,4,Zimbabwe,Africa,ZWE
14153,2020-04-27,Zimbabwe,0,0,31,4,Zimbabwe,Africa,ZWE
14154,2020-04-28,Zimbabwe,1,0,32,4,Zimbabwe,Africa,ZWE
14155,2020-04-29,Zimbabwe,0,0,32,4,Zimbabwe,Africa,ZWE


In [5]:
# However, there are some null values, which indicates some countries' names are not matched properly.  
full_df_cont[full_df_cont['continent'].isnull()].drop_duplicates(['location'])

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,country,continent,code_3
6062,2019-12-31,International,0,0,0,0,,,
7127,2020-03-16,Kosovo,2,0,2,0,,,
12541,2020-03-23,Syria,1,0,1,0,,,
13713,2020-03-07,Vatican,1,0,1,0,,,
13930,2019-12-31,World,27,0,27,0,,,


# Fig1. Project death data into a world map

In [6]:
# The shapefile with world coordinates from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/
shapefile = '/Users/W/Downloads/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'
geo_df = geopandas.read_file(shapefile)[['ADMIN','ADM0_A3','geometry']]
geo_df.columns = ['country','country_code','geometry']
geo_df

Unnamed: 0,country,country_code,geometry
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
...,...,...,...
172,Republic of Serbia,SRB,"POLYGON ((18.82982 45.90887, 18.82984 45.90888..."
173,Montenegro,MNE,"POLYGON ((20.07070 42.58863, 19.80161 42.50009..."
174,Kosovo,KOS,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."
175,Trinidad and Tobago,TTO,"POLYGON ((-61.68000 10.76000, -61.10500 10.890..."


In [7]:
# drop Antarctica, since it takes a large space in our map
geo_df[geo_df['country']=='Antarctica'].index

Int64Index([159], dtype='int64')

In [8]:
geo_df = geo_df.drop(geo_df.index[159])
geo_df

Unnamed: 0,country,country_code,geometry
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
...,...,...,...
172,Republic of Serbia,SRB,"POLYGON ((18.82982 45.90887, 18.82984 45.90888..."
173,Montenegro,MNE,"POLYGON ((20.07070 42.58863, 19.80161 42.50009..."
174,Kosovo,KOS,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."
175,Trinidad and Tobago,TTO,"POLYGON ((-61.68000 10.76000, -61.10500 10.890..."


In [9]:
temp = full_df_cont[['date','location','total_deaths','total_cases','code_3']]
today_country_df = temp[temp['date']=='2020-04-24']
today_country_df

Unnamed: 0,date,location,total_deaths,total_cases,code_3
105,2020-04-24,Afghanistan,42,1281,AFG
158,2020-04-24,Albania,27,663,ALB
275,2020-04-24,Algeria,407,3007,DZA
323,2020-04-24,Andorra,37,724,AND
363,2020-04-24,Angola,2,25,AGO
...,...,...,...,...,...
13918,2020-04-24,Vietnam,0,270,VNM
14045,2020-04-24,World,190599,2657910,
14066,2020-04-24,Yemen,0,1,YEM
14109,2020-04-24,Zambia,3,76,ZMB


In [10]:
today_country_df['total_deaths'].min(), today_country_df['total_deaths'].max()

(0, 190599)

In [11]:
merged_df = geo_df.merge(today_country_df, left_on='country_code',right_on='code_3')

In [12]:
merged_df['total_deaths_log'] = np.log(merged_df['total_deaths'])

def fun(x):
    if x <0:
        x=0
    return x

merged_df['total_deaths_log'] = merged_df['total_deaths_log'].apply(fun)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [13]:
merged_df

Unnamed: 0,country,country_code,geometry,date,location,total_deaths,total_cases,code_3,total_deaths_log
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000...",2020-04-24,Fiji,0,18,FJI,0.000000
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...",2020-04-24,Tanzania,10,285,TZA,2.302585
2,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742...",2020-04-24,Canada,2146,42099,CAN,7.671361
3,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",2020-04-24,United States,49963,869172,USA,10.819038
4,Kazakhstan,KAZ,"POLYGON ((87.35997 49.21498, 86.59878 48.54918...",2020-04-24,Kazakhstan,22,2334,KAZ,3.091042
...,...,...,...,...,...,...,...,...,...
157,Bosnia and Herzegovina,BIH,"POLYGON ((18.56000 42.65000, 17.67492 43.02856...",2020-04-24,Bosnia and Herzegovina,53,1413,BIH,3.970292
158,Macedonia,MKD,"POLYGON ((22.38053 42.32026, 22.88137 41.99930...",2020-04-24,Macedonia,56,1300,MKD,4.025352
159,Republic of Serbia,SRB,"POLYGON ((18.82982 45.90887, 18.82984 45.90888...",2020-04-24,Serbia,139,7276,SRB,4.934474
160,Montenegro,MNE,"POLYGON ((20.07070 42.58863, 19.80161 42.50009...",2020-04-24,Montenegro,5,319,MNE,1.609438


In [14]:
merged_df['total_deaths_log'].max(),merged_df['total_deaths_log'].min()

(10.819038010475133, 0.0)

In [15]:
# read data to json
merged_json = json.loads(merged_df.to_json())
merged_json

{'type': 'FeatureCollection',
 'features': [{'id': '0',
   'type': 'Feature',
   'properties': {'code_3': 'FJI',
    'country': 'Fiji',
    'country_code': 'FJI',
    'date': '2020-04-24',
    'location': 'Fiji',
    'total_cases': 18,
    'total_deaths': 0,
    'total_deaths_log': 0.0},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[180.0, -16.067132663642447],
       [180.0, -16.555216566639196],
       [179.36414266196414, -16.801354076946883],
       [178.72505936299711, -17.01204167436804],
       [178.59683859511713, -16.639150000000004],
       [179.0966093629971, -16.433984277547403],
       [179.4135093629971, -16.379054277547404],
       [180.0, -16.067132663642447]]],
     [[[178.12557, -17.50481],
       [178.3736, -17.33992],
       [178.71806, -17.62846],
       [178.55271, -18.15059],
       [177.93266000000003, -18.28799],
       [177.38146, -18.16432],
       [177.28504, -17.72465],
       [177.67087, -17.381140000000002],
       [178.12557, -17.50481]]

In [16]:
# convert to String like object
json_data = json.dumps(merged_json)

In [17]:
from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column

In [18]:
# input GeoJSOn source that contains features for plotting
geosource = GeoJSONDataSource(geojson = json_data)
# multi-hue color palette
palette = brewer['YlGnBu'][6]
palette = palette[::-1]
# use bokeh models
color_mapper = LinearColorMapper(palette=palette,low=0, high=12, nan_color = '#d9d9d9')
tick_labels = {'0':'0', '2':'0-7','4':'7-55','6':'55-400','8':'400-2,980','10':'2,980-22,000','12':'22,000-163,000'}

# create color bar
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=7, width=500, height=20,
                    border_line_color=None, location=(0,0), orientation='horizontal',
                    major_label_overrides = tick_labels)
# add hover tool
hover = HoverTool(tooltips=[('Country/region','@country'),('Total Deaths','@total_deaths')])

# create figure object
fig = figure(plot_height=600, plot_width=950,
             toolbar_location=None, tools=[hover])
fig.xgrid.grid_line_color = None
fig.ygrid.grid_line_color = None

fig.title.text ='Total Deaths in Each Country'
fig.title.text_font_size = '15pt'

# add patch rendered to the figure
fig.patches('xs','ys',source=geosource, 
            fill_color={'field':'total_deaths_log','transform':color_mapper},
            line_color='black', line_width=0.25, fill_alpha=1)

# specify layout
fig.add_layout(color_bar,'below')

In [19]:
output_notebook()

In [20]:
#output_file("/Users/W/InteractiveDV/fig2.html")
save(fig)

'/var/folders/rx/8k198g3510d2v93ct_r5z9200000gn/T/tmpl7utj_v8.html'

In [21]:
show(fig)

RuntimeError: Models must be owned by only a single document, BasicTickFormatter(id='1037', ...) is already in a doc

## Fig2. Linked Dashboard 

In [None]:
dash_df = full_df_cont
dash_df['month'] = dash_df['date'].apply(lambda x: x[5:7])
temp_df = dash_df.groupby(['continent','month'])[['new_cases','new_deaths']].agg('sum')
temp_df = temp_df.reset_index()
temp_df['month'] = temp_df['month'].astype('int64')

In [None]:
# let's first have a look at what this heatmap will look like
heatmap = temp_df.pivot_table(values='new_cases', index='continent', columns='month')
sns.heatmap(heatmap, annot=True)
plt.show()

In [None]:
Africa = list(temp_df[temp_df['continent']=='Africa']['new_cases'])
Americas = list(temp_df[temp_df['continent']=='Americas']['new_cases'])
Asia = list(temp_df[temp_df['continent']=='Asia']['new_cases'])
Europe = list(temp_df[temp_df['continent']=='Europe']['new_cases'])
Oceania = list(temp_df[temp_df['continent']=='Oceania']['new_cases'])

In [None]:
months = ['01', '02', '03', '04','12']
continents = ['Oceania','Europe','Asia','Americas','Africa']
months, continents

In [None]:
monthly_cases = [Africa,Americas,Asia,Europe,Oceania]
monthly_cases

In [None]:
daily_cases = dash_df.groupby(['continent','date']).agg('sum').reset_index()
daily_cases['month'] = daily_cases['date'].apply(lambda x: x[5:7])
daily_cases['date'] = daily_cases['date'].astype('datetime64[ns]')
daily_cases 

In [None]:
#create label
mySelectedLabel = ipywidgets.Label()

In [None]:
# heatmap
col_sc = bqplot.ColorScale(scheme='RdPu')
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

c_ax = bqplot.ColorAxis(scale=col_sc,orientation='vertical',side='right')
x_ax = bqplot.Axis(scale=x_sc,label='Month')
y_ax = bqplot.Axis(scale=y_sc,label='Continent',orientation='vertical',side='left')

heat_map = bqplot.GridHeatMap(color=monthly_cases, scales={'color':col_sc,'row':y_sc,'column':x_sc},
                              column=months, row=continents,
                              interactions = {'click':'select'},
                              anchor_style = {'fill':'blue'})
fig_heatmap = bqplot.Figure(marks=[heat_map],axes=[c_ax,y_ax,x_ax])

In [None]:
# bar plot
x_sc = bqplot.DateScale()
y_sc = bqplot.LinearScale()

x_ax = bqplot.Axis(scale=x_sc, label='Date')
y_ax = bqplot.Axis(scale=y_sc, label='Daily New Confirmed Cases', orientation='vertical')

i, j = 3,2

region_mask = ((daily_cases['month']==months[i]) & (daily_cases['continent']==continents[j]))
bars = bqplot.Bars(x=daily_cases[region_mask]['date'], y=daily_cases[region_mask]['new_cases'],
                    scales={'x':x_sc,'y':y_sc})

fig_bars = bqplot.Figure(marks=[bars], axes=[x_ax,y_ax])

In [None]:
# link together 
def get_data_value(change):
    if len(change['owner'].selected) == 1:
        j,i = change['owner'].selected[0]
        v = monthly_cases[4-j][i]
        mySelectedLabel.value = 'Total number of Confirmed Cases : ' + str(v)
        # update the bar plot too
        region_mask = ((daily_cases['month']==months[i]) & (daily_cases['continent']==continents[j]))
        bars.x = daily_cases[region_mask]['date']
        bars.y = daily_cases[region_mask]['new_cases']

heat_map.observe(get_data_value, 'selected')

In [None]:
# put it all together as adashboard
fig_heatmap.layout.min_width = '500px'
fig_bars.layout.min_width = '500px'

plots = ipywidgets.HBox([fig_heatmap, fig_bars])
myDashBoard = ipywidgets.VBox([mySelectedLabel, plots])
myDashBoard

## Fig3. Global Cases Added Per  Day

In [None]:
world_df = full_df[full_df['location'] == 'World'].reset_index(drop=True)
world_df['date'] = world_df['date'].astype('datetime64[ns]')
world_df

In [None]:
fig, ax = plt.subplots(figsize=(14,6))
ax.plot(world_df['date'],world_df['new_cases'], label='daily_new_cases')
ax.plot(world_df['date'], world_df['new_deaths'], label='daily_new_deaths')
ax.set_title('Global Cases & Deaths Added Per Day', fontsize=20)
plt.legend()
plt.show()

## Fig4. Bar plot of Top5 Countries

In [None]:
full_df_today = full_df[full_df.date == '2020-04-14']
full_df_today = full_df_today[full_df_today.location != 'World'].sort_values(by=['total_cases'],ascending=False)

In [None]:
fig, ax = plt.subplots(figsize=(15,6))

ax.bar(full_df_today['location'][:50],full_df_today['total_cases'][:50],label='Confirmed Cases') # get top 50 countries' data
ax.set_title('The Number of Cases and Deaths of Top 50 Countries', fontsize=15)
ax.set_xlabel('Country')
ax.set_ylabel('Total Number of Confirmed Cases')
plt.xticks(rotation=90)

ax.bar(full_df_today['location'][:50],full_df_today['total_deaths'][:50],label='Deaths')
plt.legend()
plt.show()

## Fig5. Interactive Scatter plots 

In [None]:
temporal_df = full_df.sort_values(by='date')
features = ['new_cases','new_deaths','total_cases','total_deaths']

@ipywidgets.interact(col_name1=temporal_df.columns, col_name2=features, col_name3=features)
def scatter_plot(col_name1,col_name2,col_name3):
    fig, ax = plt.subplots(figsize=(15,8))
    ax.scatter(temporal_df[col_name1], temporal_df[col_name2], c=temporal_df[col_name3], cmap='RdBu')
    ax.set_xlabel(col_name1)
    ax.set_ylabel(col_name2)
    ax.set_title('Scatter Plot between {} and {}'.format(col_name1,col_name2), fontsize=15)
    fig.autofmt_xdate(rotation=90)
    plt.show()
    

## Citations  
https://www.naturalearthdata.com/downloads/110m-cultural-vectors/    
https://data.world/markmarkoh/coronavirus-data 