# CoronaVirus Data Analysis

In [1]:
import pandas as pd
import numpy as np
import io
import requests

import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.tile_providers import Vendors, get_provider
from bokeh.models import ColumnDataSource, HoverTool

output_notebook()

In [3]:
confirmed_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
recovered_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"

confirmed_request = requests.get(confirmed_url).content
deaths_url = requests.get(deaths_url).content
recovered_url = requests.get(recovered_url).content

confirmed_df = pd.read_csv(io.StringIO(confirmed_request.decode('utf-8')))
death_df = pd.read_csv(io.StringIO(deaths_url.decode('utf-8')))
recovered_df = pd.read_csv(io.StringIO(recovered_url.decode('utf-8')))

## Confirmed Cases Globally

In [4]:
confirmed_df = confirmed_df.iloc[:,1:]
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,50,53,59,70,75,82,114,147,177,212
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,511,581,639,639,701,773,839,825,878,889
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,150,160,178,178,200,212,226,243,266,313
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,117,129,149,149,197,238,428,566,673,790


In [5]:
#list(confirmed_df['Country/Region'].unique())

In [6]:
confirmed_df[confirmed_df['Country/Region'] == 'South Africa'].iloc[:-1]


Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20


In [7]:
#confirmed_df.iloc[:,-1:]

confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,-1:].sum()

3/18/20    0
dtype: int64

In [8]:
trimmed = confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,4:]
dates = list(trimmed.columns)

In [9]:
# How many cases were recorded on the first day
dates[0]
trimmed[dates[0]].sum()

0

In [10]:
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,50,53,59,70,75,82,114,147,177,212
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,511,581,639,639,701,773,839,825,878,889
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,150,160,178,178,200,212,226,243,266,313
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,117,129,149,149,197,238,428,566,673,790


In [11]:
from datetime import datetime

_object = datetime.strptime('1/22/2020', '%M/%d/%Y')
_object

pd.to_datetime('1/22/2020')

Timestamp('2020-01-22 00:00:00')

In [12]:
tile_provider = get_provider(Vendors.CARTODBPOSITRON)

# range bounds supplied in web mercator coordinates
p = figure(x_range=(-3000000, 12000000), y_range=(-3000000, 7000000),
           x_axis_type="mercator", y_axis_type="mercator")

lat = list(confirmed_df['Lat'])
long = list(confirmed_df['Long'])

def wgs84_to_web_mercator(df, lon="Long", lat="Lat"):
    """Converts decimal longitude/latitude to Web Mercator format"""
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
    return df

wgs84_to_web_mercator(confirmed_df)

p.add_tile(tile_provider)
p.circle(x=confirmed_df['x'], y=confirmed_df['y'], size=10, fill_color="blue", fill_alpha=0.6)

show(p)

##  Impact By Country/Region

In [13]:
# User must select the region they want to see

from bokeh.models import FactorRange
from bokeh.transform import dodge

region_list = ['US', 'China', 'Italy']
c_totals = []
d_totals = []
r_totals = []

for region in region_list:
    c_region_df = confirmed_df[confirmed_df['Country/Region'] == region]
    d_region_df = death_df[death_df['Country/Region'] == region]
    r_region_df = recovered_df[recovered_df['Country/Region'] == region]
    
    d_total = d_region_df.iloc[:,-3:-2].sum()
    c_total = c_region_df.iloc[:,-3:-2].sum()
    r_total = r_region_df.iloc[:,-3:-2].sum()
    
    c_totals.append(c_total[0])
    d_totals.append(d_total[0])
    r_totals.append(r_total[0])

data = {'region_list' : region_list,
        'c_totals'   : c_totals,
        'd_totals'   : d_totals,
        'r_totals'  : r_totals
       }

source = ColumnDataSource(data=data)

p = figure(x_range=region_list, plot_height=250, title="Covid19 By Country/Region")

p.vbar(x=dodge('region_list', -0.25, range=p.x_range), top='c_totals', width=0.2, source=source,
       color="#FFC300", legend_label="Confirmed")

p.vbar(x=dodge('region_list',  0.0,  range=p.x_range), top='d_totals', width=0.2, source=source,
       color="#FF5733", legend_label="Deceased")

p.vbar(x=dodge('region_list',  0.25,  range=p.x_range), top='r_totals', width=0.2, source=source,
       color="#82E0AA", legend_label="Recovered")

p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

## Growth of the Virus over time

In [14]:
dates

['1/23/20',
 '1/24/20',
 '1/25/20',
 '1/26/20',
 '1/27/20',
 '1/28/20',
 '1/29/20',
 '1/30/20',
 '1/31/20',
 '2/1/20',
 '2/2/20',
 '2/3/20',
 '2/4/20',
 '2/5/20',
 '2/6/20',
 '2/7/20',
 '2/8/20',
 '2/9/20',
 '2/10/20',
 '2/11/20',
 '2/12/20',
 '2/13/20',
 '2/14/20',
 '2/15/20',
 '2/16/20',
 '2/17/20',
 '2/18/20',
 '2/19/20',
 '2/20/20',
 '2/21/20',
 '2/22/20',
 '2/23/20',
 '2/24/20',
 '2/25/20',
 '2/26/20',
 '2/27/20',
 '2/28/20',
 '2/29/20',
 '3/1/20',
 '3/2/20',
 '3/3/20',
 '3/4/20',
 '3/5/20',
 '3/6/20',
 '3/7/20',
 '3/8/20',
 '3/9/20',
 '3/10/20',
 '3/11/20',
 '3/12/20',
 '3/13/20',
 '3/14/20',
 '3/15/20',
 '3/16/20',
 '3/17/20',
 '3/18/20']

In [15]:
from bokeh.models import NumeralTickFormatter
dates_df = confirmed_df.iloc[:,3:]
dates = list(dates_df.columns)

confirmed_sums_list = []
death_sums_list = []
recovered_sums_list = []

if 'x' in dates and 'y' in dates:
    dates.remove('x')
    dates.remove('y')
        
for date in dates:  
    confirmed_sum = confirmed_df[date].sum()
    confirmed_sums_list.append(confirmed_sum)
    death_sum = death_df[date].sum()
    death_sums_list.append(death_sum)
    recovered_sum = recovered_df[date].sum()
    recovered_sums_list.append(recovered_sum)

# stack death -> recovered -> confirmed

source = ColumnDataSource(data=dict(
    x=[pd.to_datetime(date) for date in dates],
    y1=death_sums_list,
    y2=recovered_sums_list,
    y3=confirmed_sums_list,
))

TOOLTIPS  = [
    ("Deceased", "@y1{0,0}"),
    ("Recovered", "@y2{0,0}"),
    ("Confirmed", "@y3{0,0}")
]

p = figure(x_axis_type='datetime', plot_width=600, plot_height=400, toolbar_location="right")

p.varea_stack(['y1', 'y2', 'y3'], x='x', color=("#82E0AA", "#FF5733", "#FFC300"), source=source)

p.yaxis.formatter=NumeralTickFormatter(format="00")

show(p)

In [35]:
from bokeh.models import NumeralTickFormatter
dates_df = confirmed_df.iloc[:,3:]
dates = list(dates_df.columns)

confirmed_sums_list = []
death_sums_list = []
recovered_sums_list = []

if 'x' in dates and 'y' in dates:
    dates.remove('x')
    dates.remove('y')
        
for date in dates:  
    confirmed_sum = confirmed_df[date].sum()
    confirmed_sums_list.append(confirmed_sum)
    death_sum = death_df[date].sum()
    death_sums_list.append(death_sum)
    recovered_sum = recovered_df[date].sum()
    recovered_sums_list.append(recovered_sum)

# stack death -> recovered -> confirmed

source = ColumnDataSource(data=dict(
    x=[pd.to_datetime(date) for date in dates],
    y1=death_sums_list,
    y2=recovered_sums_list,
    y3=confirmed_sums_list,
))

TOOLTIPS  = [
    ("Confirmed", "@y3{0,0}"),
    ("Recovered", "@y2{0,0}"),
    ("Deceased", "@y1{0,0}")    
]

p = figure(x_axis_type='datetime', plot_width=600, plot_height=400, toolbar_location="right", tooltips=TOOLTIPS)

p.line(x='x', y='y1', color="#FF5733", line_width=2, source=source)
p.line(x='x', y='y2', color="#82E0AA", line_width=2, source=source)
p.line(x='x', y='y3', color="#FFC300", line_width=2, source=source)

p.circle(x='x', y='y1', color="#FF5733", line_width=2, source=source)
p.circle(x='x', y='y2', color="#82E0AA", line_width=2, source=source)
p.circle(x='x', y='y3', color="#FFC300", line_width=2, source=source)

p.yaxis.formatter=NumeralTickFormatter(format="00")
p.xaxis.

show(p)

In [38]:
from bokeh.models import NumeralTickFormatter
dates_df = confirmed_df.iloc[:,3:]
dates = list(dates_df.columns)

confirmed_sums_list = []
death_sums_list = []
recovered_sums_list = []

if 'x' in dates and 'y' in dates:
    dates.remove('x')
    dates.remove('y')
        
for date in dates:  
    confirmed_sum = confirmed_df[date].sum()
    confirmed_sums_list.append(confirmed_sum)
    death_sum = death_df[date].sum()
    death_sums_list.append(death_sum)
    recovered_sum = recovered_df[date].sum()
    recovered_sums_list.append(recovered_sum)

# stack death -> recovered -> confirmed

source = ColumnDataSource(data=dict(
    x=[pd.to_datetime(date) for date in dates],
    y1=death_sums_list,
    y2=recovered_sums_list,
    y3=confirmed_sums_list,
))

TOOLTIPS  = [
    ("Confirmed", "@y3{0,0}"),
    ("Recovered", "@y2{0,0}"),
    ("Deceased", "@y1{0,0}")    
]

p = figure(x_axis_type='datetime', plot_width=600, plot_height=400, toolbar_location="right", tooltips=TOOLTIPS)

p.varea_stack(['y2'], x='x', color="#82E0AA", source=source)

p.yaxis.formatter=NumeralTickFormatter(format="00")

show(p)

## Confirmed vs Deceased vs Recovered

In [16]:
confirmed_cases = confirmed_df.iloc[:,-3:-2].sum()
deceased_cases = death_df.iloc[:,-1:].sum()
recovered_cases = recovered_df.iloc[:,-1:].sum()

confirmed_cases

3/18/20    214910
dtype: int64

In [17]:
from math import pi
import pandas as pd

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.transform import cumsum
x = {
    'Confirmed': confirmed_cases[0],
    'Deceased': deceased_cases[0],
    'Recovered': recovered_cases[0]
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index':'Cases'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = ["#FFC300", "#FF5733", "#82E0AA"]

p = figure(plot_height=350, title="Confirmed vs Deceased vs Recovered", toolbar_location=None,
           tools="hover", tooltips="@Cases: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='Cases', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)

In [18]:
from math import pi
import pandas as pd

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.transform import cumsum
x = {
    'Confirmed': confirmed_cases[0],
    'Deceased': deceased_cases[0],
    'Recovered': recovered_cases[0]
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index':'Cases'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = ["#FFC300", "#FF5733", "#82E0AA"]

p = figure(plot_height=350, title="Confirmed vs Deceased vs Recovered", toolbar_location=None, match_aspect=True,
           tools="hover", tooltips="@Cases: @value", x_range=(-0.5, 1.0))

p.annular_wedge(x=0, y=1, inner_radius=0.4, outer_radius=0.25, direction="anticlock",
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='Cases', source=data)

hover = HoverTool(tooltips=[("@Cases", "@value{0,0}")], mode="mouse", point_policy="follow_mouse")
p.add_tools(hover)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)

In [19]:
from bokeh.plotting import figure, output_file, show
from bokeh.tile_providers import CARTODBPOSITRON, get_provider

tile_provider = get_provider(CARTODBPOSITRON)

# range bounds supplied in web mercator coordinates
p = figure(x_range=(-2000000, 6000000), y_range=(-1000000, 7000000),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(tile_provider)

show(p)



In [92]:
def test(df, *args):
    print(df.head(1))
    print(args[0])
        
test(confirmed_df, 'color')

  Country/Region   Lat   Long  1/22/20  1/23/20  1/24/20  1/25/20  1/26/20  \
0       Thailand  15.0  101.0        2        3        5        7        8   

   1/27/20  1/28/20  ...  3/11/20  3/12/20  3/13/20  3/14/20  3/15/20  \
0        8       14  ...       59       70       75       82      114   

   3/16/20  3/17/20  3/18/20             x             y  
0      147      177      212  1.124327e+07  1.689200e+06  

[1 rows x 62 columns]
color


In [97]:
def create_chart(df, *args, **kwargs):
    if (kwargs):
        death_df = kwargs['death_df']
        recovered_df = kwargs['recovered_df']
        return create_stacked_chart(df, death_df, recovered_df)
    
    if (args):
        colour = args[0]
        return create_area_chart(df, colour)

def create_area_chart(df, colour):
    dates_df = df.iloc[:,3:]
    dates = list(dates_df.columns)

    sums_list = []

    if 'x' in dates and 'y' in dates:
        dates.remove('x')
        dates.remove('y')
            
    for date in dates:  
        sum_ = df[date].sum()
        sums_list.append(sum_)

    # stack death -> recovered -> confirmed

    source = ColumnDataSource(data=dict(
        x=[pd.to_datetime(date) for date in dates],
        y1=sums_list
    ))

    TOOLTIPS  = [
        ("Confirmed", "@y1{0,0}"),   
    ]

    p = figure(x_axis_type='datetime', plot_width=600, plot_height=400, toolbar_location="right", tooltips=TOOLTIPS)

    p.varea_stack(['y1'], x='x', color=colour, source=source)

    p.yaxis.formatter=NumeralTickFormatter(format="00")

    return p

show(create_chart(confirmed_df, 'red'))