# CoronaVirus Data Analysis

In [132]:
import pandas as pd
import io
import requests

In [2]:
confirmed_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
recovered_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"

confirmed_request = requests.get(confirmed_url).content
deaths_url = requests.get(deaths_url).content
recovered_url = requests.get(recovered_url).content

confirmed_df = pd.read_csv(io.StringIO(confirmed_request.decode('utf-8')))
death_df = pd.read_csv(io.StringIO(deaths_url.decode('utf-8')))
recovered_df = pd.read_csv(io.StringIO(recovered_url.decode('utf-8')))

## Confirmed Cases Dataset

In [27]:
confirmed_df = confirmed_df.iloc[:,1:]
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,43,43,43,47,48,50,50,50,53,59
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,274,293,331,360,420,461,502,511,581,639
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,108,110,110,117,130,138,150,150,160,178
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,29,36,50,50,83,93,99,117,129,149


In [28]:
#list(confirmed_df['Country/Region'].unique())

In [29]:
confirmed_df[confirmed_df['Country/Region'] == 'South Africa'].iloc[:-1]


Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20


In [30]:
#confirmed_df.iloc[:,-1:]

confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,-1:].sum()

3/11/20    0
dtype: int64

In [31]:
trimmed = confirmed_df[confirmed_df['Country/Region'] == 'Mainland China'].iloc[:,4:]
dates = list(trimmed.columns)

In [32]:
# How many cases were recorded on the first day
dates[0]
trimmed[dates[0]].sum()

0

In [33]:
confirmed_df.head()

Unnamed: 0,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20
0,Thailand,15.0,101.0,2,3,5,7,8,8,14,...,43,43,43,47,48,50,50,50,53,59
1,Japan,36.0,138.0,2,1,2,2,4,4,7,...,274,293,331,360,420,461,502,511,581,639
2,Singapore,1.2833,103.8333,0,1,3,3,4,5,7,...,108,110,110,117,130,138,150,150,160,178
3,Nepal,28.1667,84.25,0,0,0,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Malaysia,2.5,112.5,0,0,0,3,4,4,4,...,29,36,50,50,83,93,99,117,129,149


In [34]:
from datetime import datetime

_object = datetime.strptime('1/22/2020', '%M/%d/%Y')
_object

pd.to_datetime('1/22/2020')

Timestamp('2020-01-22 00:00:00')

In [35]:
import bokeh
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.tile_providers import Vendors, get_provider
from bokeh.models import ColumnDataSource

output_notebook()

In [118]:
tile_provider = get_provider(Vendors.CARTODBPOSITRON)

# range bounds supplied in web mercator coordinates
p = figure(x_range=(-3000000, 12000000), y_range=(-3000000, 7000000),
           x_axis_type="mercator", y_axis_type="mercator")

lat = list(confirmed_df['Lat'])
long = list(confirmed_df['Long'])

def wgs84_to_web_mercator(df, lon="Long", lat="Lat"):
    """Converts decimal longitude/latitude to Web Mercator format"""
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k
    return df

wgs84_to_web_mercator(confirmed_df)

p.add_tile(tile_provider)
p.circle(x=confirmed_df['x'], y=confirmed_df['y'], size=10, fill_color="blue", fill_alpha=0.6)

show(p)

In [129]:
confirmed_df['Country/Region'].unique()

array(['Thailand', 'Japan', 'Singapore', 'Nepal', 'Malaysia', 'Canada',
       'Australia', 'Cambodia', 'Sri Lanka', 'Germany', 'Finland',
       'United Arab Emirates', 'Philippines', 'India', 'Italy', 'Sweden',
       'Spain', 'Belgium', 'Egypt', 'Lebanon', 'Iraq', 'Oman',
       'Afghanistan', 'Bahrain', 'Kuwait', 'Algeria', 'Croatia',
       'Switzerland', 'Austria', 'Israel', 'Pakistan', 'Brazil',
       'Georgia', 'Greece', 'North Macedonia', 'Norway', 'Romania',
       'Estonia', 'Netherlands', 'San Marino', 'Belarus', 'Iceland',
       'Lithuania', 'Mexico', 'New Zealand', 'Nigeria', 'Ireland',
       'Luxembourg', 'Monaco', 'Qatar', 'Ecuador', 'Azerbaijan',
       'Armenia', 'Dominican Republic', 'Indonesia', 'Portugal',
       'Andorra', 'Latvia', 'Morocco', 'Saudi Arabia', 'Senegal',
       'Argentina', 'Chile', 'Jordan', 'Ukraine', 'Hungary',
       'Liechtenstein', 'Poland', 'Tunisia', 'Bosnia and Herzegovina',
       'Slovenia', 'South Africa', 'Bhutan', 'Cameroon', 'Colo

In [183]:
# User must select the region they want to see


# Confirmed Cases By Region
region_list = ['US', 'China', 'Italy']
c_totals = []
d_totals = []

for region in region_list:
    c_region_df = confirmed_df[confirmed_df['Country/Region'] == region]
    d_region_df = death_df[death_df['Country/Region'] == region]
    d_total = d_region_df.iloc[:,-3:-2].sum()
    c_total = c_region_df.iloc[:,-3:-2].sum()
    c_totals.append(c_total[0])
    d_totals.append(d_total[0])

p = figure(x_range = region_list, plot_height=350, title = 'Corona Virus By Country/Region', tools="hover")
p.vbar(x=region_list, top=c_totals, width=0.9, color = 'blue')
p.vbar(x=region_list, top=d_totals, width=0.9, color='red')

show(p)

In [181]:
death_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20
0,,Thailand,15.0,101.0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
1,,Japan,36.0,138.0,0,0,0,0,0,0,...,6,6,6,6,6,6,6,10,10,15
2,,Singapore,1.2833,103.8333,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,Nepal,28.1667,84.25,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,Malaysia,2.5,112.5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [184]:
[(c, d) for c in c_totals for d in d_totals]

[(2384, 22),
 (2384, 3123),
 (2384, 463),
 (80921, 22),
 (80921, 3123),
 (80921, 463),
 (12462, 22),
 (12462, 3123),
 (12462, 463)]