# CoronaVirus Data Analysis

In [1]:
import pandas as pd
import numpy as np
import io
import requests

import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.tile_providers import Vendors, get_provider
from bokeh.models import ColumnDataSource

output_notebook()

In [2]:
confirmed_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
recovered_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"

confirmed_request = requests.get(confirmed_url).content
deaths_url = requests.get(deaths_url).content
recovered_url = requests.get(recovered_url).content

confirmed_df = pd.read_csv(io.StringIO(confirmed_request.decode('utf-8')))
death_df = pd.read_csv(io.StringIO(deaths_url.decode('utf-8')))
recovered_df = pd.read_csv(io.StringIO(recovered_url.decode('utf-8')))

## Confirmed Cases Globally

In [3]:
confirmed_df = confirmed_df.iloc[:,1:]
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,43,47,48,50,50,50,53,59,70,75
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,331,360,420,461,502,511,581,639,639,701
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,110,117,130,138,150,150,160,178,178,200
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,50,50,83,93,99,117,129,149,149,197


In [4]:
#list(confirmed_df['Country/Region'].unique())

In [5]:
confirmed_df[confirmed_df['Country/Region'] == 'South Africa'].iloc[:-1]


Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20


In [6]:
#confirmed_df.iloc[:,-1:]

confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,-1:].sum()

3/13/20    0
dtype: int64

In [7]:
trimmed = confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,4:]
dates = list(trimmed.columns)

In [8]:
# How many cases were recorded on the first day
dates[0]
trimmed[dates[0]].sum()

0

In [9]:
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,43,47,48,50,50,50,53,59,70,75
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,331,360,420,461,502,511,581,639,639,701
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,110,117,130,138,150,150,160,178,178,200
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,50,50,83,93,99,117,129,149,149,197


In [10]:
from datetime import datetime

_object = datetime.strptime('1/22/2020', '%M/%d/%Y')
_object

pd.to_datetime('1/22/2020')

Timestamp('2020-01-22 00:00:00')

In [11]:
tile_provider = get_provider(Vendors.CARTODBPOSITRON)

# range bounds supplied in web mercator coordinates
p = figure(x_range=(-3000000, 12000000), y_range=(-3000000, 7000000),
           x_axis_type="mercator", y_axis_type="mercator")

lat = list(confirmed_df['Lat'])
long = list(confirmed_df['Long'])

def wgs84_to_web_mercator(df, lon="Long", lat="Lat"):
    """Converts decimal longitude/latitude to Web Mercator format"""
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
    return df

wgs84_to_web_mercator(confirmed_df)

p.add_tile(tile_provider)
p.circle(x=confirmed_df['x'], y=confirmed_df['y'], size=10, fill_color="blue", fill_alpha=0.6)

show(p)

##  Impact By Country/Region

In [30]:
# User must select the region they want to see

from bokeh.models import FactorRange
from bokeh.transform import dodge

region_list = ['US', 'China', 'Italy']
c_totals = []
d_totals = []
r_totals = []

for region in region_list:
    c_region_df = confirmed_df[confirmed_df['Country/Region'] == region]
    d_region_df = death_df[death_df['Country/Region'] == region]
    r_region_df = recovered_df[recovered_df['Country/Region'] == region]
    
    d_total = d_region_df.iloc[:,-3:-2].sum()
    c_total = c_region_df.iloc[:,-3:-2].sum()
    r_total = r_region_df.iloc[:,-3:-2].sum()
    
    c_totals.append(c_total[0])
    d_totals.append(d_total[0])
    r_totals.append(d_total[0])

data = {'region_list' : region_list,
        'c_totals'   : c_totals,
        'd_totals'   : d_totals,
        'r_totals'  : r_totals
       }

source = ColumnDataSource(data=data)

p = figure(x_range=region_list, plot_height=250, title="Covid19 By Country/Region")

p.vbar(x=dodge('region_list', -0.25, range=p.x_range), top='c_totals', width=0.2, source=source,
       color="#FFC300", legend_label="Confirmed")

p.vbar(x=dodge('region_list',  0.0,  range=p.x_range), top='d_totals', width=0.2, source=source,
       color="#FF5733", legend_label="Deceased")

p.vbar(x=dodge('region_list',  0.25,  range=p.x_range), top='r_totals', width=0.2, source=source,
       color="#82E0AA", legend_label="Recovered")

p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

## Growth of the Virus over time

In [33]:
from bokeh.models import NumeralTickFormatter
dates_df = confirmed_df.iloc[:,3:]
dates = list(dates_df.columns)

confirmed_sums_list = []
death_sums_list = []
recovered_sums_list = []

if 'x' in dates and 'y' in dates:
    dates.remove('x')
    dates.remove('y')
        
for date in dates:  
    confirmed_sum = confirmed_df[date].sum()
    confirmed_sums_list.append(confirmed_sum)
    death_sum = death_df[date].sum()
    death_sums_list.append(death_sum)
    recovered_sum = recovered_df[date].sum()
    recovered_sums_list.append(recovered_sum)

# stack death -> recovered -> confirmed

source = ColumnDataSource(data=dict(
    x=[pd.to_datetime(date) for date in dates],
    y1=death_sums_list,
    y2=recovered_sums_list,
    y3=confirmed_sums_list,
))

TOOLTIPS  = [
    ("Deceased", "@y1{0,0}"),
    ("Recovered", "@y2{0,0}"),
    ("Confirmed", "@y3{0,0}")
]

p = figure(x_axis_type='datetime', plot_width=800, plot_height=400, toolbar_location="right")

p.varea_stack(['y1', 'y2', 'y3'], x='x', color=("#82E0AA", "#FF5733", "#FFC300"), source=source)

p.yaxis.formatter=NumeralTickFormatter(format="00")

show(p)