In [88]:
# import our usual things
%matplotlib inline
import cartopy
import pandas as pd
import matplotlib.pyplot as plt
import geopandas 
import ipyleaflet
import numpy as np
import bqplot
import ipywidgets

## Exploring the data



In [325]:
full = pd.read_csv('full_data.csv')

In [90]:
full.head()

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths
0,2019-12-31,Afghanistan,0,0,0,0
1,2020-01-01,Afghanistan,0,0,0,0
2,2020-01-02,Afghanistan,0,0,0,0
3,2020-01-03,Afghanistan,0,0,0,0
4,2020-01-04,Afghanistan,0,0,0,0


In [342]:
len(full)

10230

In [91]:
country_all = full[full['location'] == 'United States']

In [92]:
country_time = country_all[country_all['total_cases']>100]

In [93]:
country_time

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths
9811,2020-03-03,United States,14,4,103,6
9812,2020-03-04,United States,22,3,125,9
9813,2020-03-05,United States,34,2,159,11
9814,2020-03-06,United States,74,1,233,12
9815,2020-03-07,United States,105,2,338,14
9816,2020-03-08,United States,95,3,433,17
9817,2020-03-09,United States,121,4,554,21
9818,2020-03-10,United States,200,5,754,26
9819,2020-03-11,United States,271,2,1025,28
9820,2020-03-12,United States,287,2,1312,30


In [340]:
# data fetching and cleaning
def get_country_data(country):
    country_all = full[full['location'] == country]
    country_time = country_all[country_all['total_cases']>100]
    return country_time

In [249]:
np.array(get_country_data('China')['total_cases'])

array([  216,   235,   386,   526,   623,   882,  1323,  1988,  2775,
        4528,  5994,  7734,  9714, 11809, 14399, 17211, 20448, 24320,
       28047, 31207, 34625, 37232, 40206, 42696, 44724, 59865, 64021,
       66559, 68566, 70618, 72508, 74258, 74652, 75543, 76369, 77016,
       77234, 77749, 78159, 78598, 78927, 79355, 79929, 80134, 80261,
       80380, 80497, 80667, 80768, 80814, 80859, 80879, 80908, 80932,
       80954, 80973, 80995, 81020, 81063, 81086, 81130, 81229, 81281,
       81346, 81484, 81553, 81631, 81733, 81827, 81946, 82059, 82157,
       82241, 82295, 82395, 82465, 82527, 82575, 82642, 82698, 82784,
       82870, 82925, 83004])

In [271]:
n1 = np.fromiter(get_country_data('China')['total_cases'], dtype="int")

In [272]:
n2 = np.fromiter(get_country_data('Italy')['total_cases'], dtype="int")

In [273]:
n = []
n.append(n1)
n.append(n2)

In [296]:
np.full((2, 2), np.inf)

array([[inf, inf],
       [inf, inf]])

In [330]:
np.concatenate((n2,np.full((2, ), np.nan)))

array([1.32000e+02, 2.29000e+02, 3.22000e+02, 4.00000e+02, 6.50000e+02,
       8.88000e+02, 1.12800e+03, 1.68900e+03, 2.03600e+03, 2.50200e+03,
       3.08900e+03, 3.85800e+03, 4.63600e+03, 5.88300e+03, 7.37500e+03,
       9.17200e+03, 1.01490e+04, 1.24620e+04, 1.51130e+04, 1.76600e+04,
       2.11570e+04, 2.39800e+04, 2.79800e+04, 3.15060e+04, 3.57130e+04,
       4.10350e+04, 4.70210e+04, 5.35780e+04, 5.91380e+04, 6.39270e+04,
       6.91760e+04, 7.43860e+04, 8.05390e+04, 8.64980e+04, 9.24720e+04,
       9.76890e+04, 1.01739e+05, 1.05792e+05, 1.10574e+05, 1.15242e+05,
       1.19827e+05, 1.24632e+05, 1.28948e+05, 1.32547e+05, 1.35586e+05,
       1.39422e+05, 1.43626e+05, 1.47577e+05,         nan,         nan])

#### Fill the array to be the same length

In [341]:
# To plot a beautiful plot in bqplot we should have all the lines in same np.arry length
def transformToNp(arrays):
    max_len = max([len(a) for a in arrays])
    return np.array([np.concatenate((np.array(a),np.full((max_len-len(a), ), np.nan))) for a in arrays])

In [333]:
from bqplot import pyplot as plt

In [334]:
colors = ["#F70020","#191A1A","#FB9701","#1A7D00","#072C8F","#9E9E9E"]

In [335]:
def make_line_chart(countries, log, confirm):
    x_sc = bqplot.LinearScale(min=1)
    # lets do one y-scale over linear and 1 over log
    y_scLinear = bqplot.LinearScale(min=100)
    y_scLog = bqplot.LogScale()
    y_sc = y_scLog if log else y_scLinear
    
    indexes = range(1,51)
    lines = []
    def_tt = bqplot.Tooltip(fields=['x', 'y'], formats=['', '.0f'], labels=['Country', 'Number'])

    ys = []
    for ind,country in enumerate(countries):
        choose = 'total_cases' if confirm else 'total_deaths'
        ys.append(np.fromiter(get_country_data(country)[choose], dtype="int"))
    ys = transformToNp(ys)
    lines.append(bqplot.Lines(x = indexes, y = ys, 
                                      scales = {'x': x_sc, 'y': y_sc},marker_size=32,marker='circle',colors=colors))

    # and lets plot an x axis like before
    ax_x = bqplot.Axis(scale = x_sc, label = 'Day')
    # y axis for linear/log presentation
    ax_y = bqplot.Axis(scale = y_sc, label = 'Cases', 
                        orientation = 'vertical')
    # lets allow pan and zoom

    selector = bqplot.interacts.IndexSelector(line_width=1,scale = x_sc) 
    def print_change(change):
        l.value = str(change)
#     selector.observe(print_change, names=['brushing'])
    #bqplot.interacts.PanZoom?
    
    
    fig = bqplot.Figure(marks = lines, 
                        axes = [ax_x, ax_y],interaction=selector)
    #display(fig)
    l = ipywidgets.Label()
    ipywidgets.jslink((fig.interaction, 'selected'),(l,'value'))
    return fig,l

In [336]:
f,l= make_line_chart(['China','United States','Italy'],True,True)


In [337]:
display(l)

Label(value='')

In [338]:
display(f)

Figure(axes=[Axis(label='Day', scale=LinearScale(min=1.0), side='bottom'), Axis(label='Cases', orientation='ve…

In [339]:
from IPython.display import display, clear_output
# f = make_line_chart(['China','United States','Italy'],True)
%config InlineBackend.close_figures=False

# plt.ioff()
# plt.close('all') # if you get a "too many figures open" warning
default_countries = ['China','United States','Italy']

country_select = ipywidgets.SelectMultiple(
    options=['China','United States','Italy','United Kingdom','France','India'],
    value=default_countries,
    #rows=10,
    description='Fruits',
    disabled=False
)
confirm_select = ipywidgets.ToggleButtons(
    options=['Confirmed', 'Death'],
#     description='Speed:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Confirmed Cases', 'Death Cases'],
#     icons=['check'] * 3
)

log_select = ipywidgets.Checkbox(
    value=True,
    description='Log',
    disabled=False,
    indent=True
)

check = ipywidgets.Button(
    description='Click me to update',
    disabled=False,
    button_style='', 
    tooltip='Change'
)
selects = ipywidgets.HBox([confirm_select,log_select,check])
controls = ipywidgets.VBox([country_select,selects])
# controls.layout.top = '200px'

out = ipywidgets.Output()  # NEW WIDGET CALL

# print('1')


def update(change):
    if confirm_select.value == 'Confirmed':
        c_s = True
    else:
        c_s = False
    countries = country_select.value
    fig,label = make_line_chart(countries, log_select.value, c_s)
    with out:
        clear_output()
        display(fig)
        display(label)
vbox = ipywidgets.VBox([controls,out])
display(vbox)
check.on_click(update)
update(None)
# display(vbox)

VBox(children=(VBox(children=(SelectMultiple(description='Fruits', index=(0, 1, 2), options=('China', 'United …

## Summarize

This dataset includes the total daily data of confirmed and dead cases of coronavirus. It includes 6 rows:
* 1. date: Date yyyy-mm-dd.
* 2. location: String. Country Name
* 3. new_cases: Int. New confirmed case number in that day.
* 4. new_deaths: Int. New death number in that day.
* 5. total_cases: Int. Total confirmed cases until that day.
* 6. total_deaths: Int. Total death cases until that day.
Every row is a country's data in a specific day.

The name of the dataset is called: Coronavirus daily data summary. Comes from https://data.world/markmarkoh/coronavirus-data/workspace/project-summary?agentid=markmarkoh&datasetid=coronavirus-data

According to the data summary, the author sychronizes data every day from official CDC and WHO websites. https://ourworldindata.org/coronavirus-source-data

In the dataset project, it said it's shared with Everyone, according to the Data World help page: "The work has been dedicated to the public domain by waiving all rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law." https://data.world/license-help

It is a small dataset as this pandemic just happend this year, the total size is 311KB and with 10230 rows and 6 columns. 

A simple data visulization dashboard is attached above.