 # Corona Analysis

### Import required modules and extract data

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import matplotlib.pyplot as plt
import seaborn as sns

from bokeh.plotting import figure
from bokeh.io import show, output_notebook

url = 'https://www.worldometers.info/coronavirus/'

#### Extracting through beautifulsoup4

In [2]:
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')


##### Reading & Formating tables

In [3]:
# Read todays data
list_today = []
table = soup.find('table', id = 'main_table_countries_today')
table_rows = table.find_all('tr')

In [4]:
for tr in table_rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    if row:
        list_today.append(row)

_columns = ['Country','Total_Cases','New_Cases','Total_Deaths','New_Deaths','Total_recovered','Active_Cases','Serious_Cases',
            'Total_Cases/M','Total_Death/M','Total_test','Total_test/m', 'continent']
df_today = pd.DataFrame(list_today, columns=_columns)

for _colm in _columns[1:-1]:
    df_today[_colm] = df_today[_colm].apply(lambda x: (str(x).replace(' ','').replace('+','').replace(',','').replace('N/A','') if x else 0))
    df_today[_colm] = pd.to_numeric(df_today[_colm])
    df_today[_colm].fillna(0, inplace=True)
df_today = df_today[~((df_today['Country'].str.contains('Total')) & (~df_today['continent'].str.contains('All')))]
df_today.drop('continent', axis=1, inplace=True)
    

# Read Yesterdays data
list_yest = []
table = soup.find('table', id = 'main_table_countries_yesterday')
table_rows = table.find_all('tr')

for tr in table_rows:
    td = tr.find_all('td')
    row = [i.text for i in td]
    if row:
        list_yest.append(row)

_columns_y = ['Country','Total_Cases_y','New_Cases_y','Total_Deaths_y','New_Deaths_y','Total_recovered_y','Active_Cases_y','Serious_Cases_y','Total_Cases/M_y','Total_Death/M_y', 'Total_test_y','Total_test/m_y', 'continent_y']
df_yest = pd.DataFrame(list_yest, columns=_columns_y)

for _colm in _columns_y[1:-1]:
    df_yest[_colm] = df_yest[_colm].apply(lambda x: (str(x).replace(' ','').replace('+','').replace(',','').replace("N/A",'') if x else 0))   
    df_yest[_colm] = pd.to_numeric(df_yest[_colm])
    df_yest[_colm].fillna(0, inplace=True)
df_yest = df_yest[~((df_yest['Country'].str.contains('Total')) & (~df_yest['continent_y'].str.contains('All')))]
df_yest.drop('continent_y', axis=1, inplace=True)

df = df_today.merge(df_yest, how='outer')

In [5]:
df[df['Country'].str.contains('Total')]

Unnamed: 0,Country,Total_Cases,New_Cases,Total_Deaths,New_Deaths,Total_recovered,Active_Cases,Serious_Cases,Total_Cases/M,Total_Death/M,...,New_Cases_y,Total_Deaths_y,New_Deaths_y,Total_recovered_y,Active_Cases_y,Serious_Cases_y,Total_Cases/M_y,Total_Death/M_y,Total_test_y,Total_test/m_y
220,Total:,1983018,59170,125121.0,5503,466375.0,1391522,50954,254.4,16.1,...,71591,119618.0,5423,444636.0,1359594,51747,246.8,15.3,0,0


In [6]:
df = df[~df['Country'].str.contains('World|North America|Europe|Asia|South America|Oceania|Africa')]

#### Extracting directy using Pandas (Alternate Options)

In [7]:
# dfs = pd.read_html(url, header=0)

# cases_today = dfs[0].copy()
# cases_yst = dfs[-1].copy()

# cases_today['NewCases'] = cases_today['NewCases'].apply(lambda x: int(str(x).replace('+','').replace(',','').replace('nan','0')))
# cases_yst['NewCases'] = cases_yst['NewCases'].apply(lambda x: int(str(x).replace('+','').replace(',','').replace('nan','0')))

# for colm in cases_yst.columns:
#     if colm != 'Country,Other':
#         new = colm + '_yst'
#         cases_yst.rename(columns = {colm:new}, inplace=True)

# cases = cases_today.merge(cases_yst, how='outer')

##### Create Data for Plots

In [8]:
df_plt = df[['Country','Total_recovered', 'Total_Deaths', 'Total_Cases']].sort_values(by=['Total_Deaths'], ascending= False).head(11)
df_plt = df_plt[~df_plt['Country'].str.contains('China|Total')]

In [9]:
# Pie for New cases
# df_pie = df[['Country','New_Cases']]

# df_pie = df_pie[~df_pie['Country'].str.contains('Total')].sort_values(by=['New_Cases'], ascending=False)

# df_pie_10 = df_pie[:10]

# df_pie_10 = df_pie_10.append({'Country':'Others', 'New_Cases':df_pie[10:]['New_Cases'].sum()}, ignore_index=True)

# from math import pi
# from bokeh.palettes import Set3

# df_pie_10['angle'] = df_pie_10['New_Cases']/df_pie_10['New_Cases'].sum() * 2*pi
# df_pie_10['color'] = Set3[len(df_pie_10)]

In [10]:
# Total cases
df_new = df[['Country','Total_Cases','Active_Cases', 'Total_Deaths','Serious_Cases']].copy()
df_new = df_new[~df_new['Country'].str.contains('Total')].sort_values(by=['Total_Cases'], ascending=False).head(10)

In [11]:
df_txt = df_ratio = df[['Country','Total_Cases','New_Cases','Total_Deaths','New_Deaths','Total_recovered','Active_Cases','Serious_Cases']].copy()
df_txt = df_txt[~df_txt['Country'].str.contains('Total')]

In [12]:
# # Including China
# df_ratio = df[['Country','Total_recovered', 'Total_Deaths', 'Total_Cases']].copy()
# df_ratio = df_ratio[~df_ratio['Country'].str.contains('Total')]
# df_ratio['death_ratio'] = (df_ratio['Total_Deaths']/ (df_ratio['Total_recovered'] + df_ratio['Total_Deaths']))*100

# df_ratio['Wt_Cases'] = df_ratio['Total_Cases']/df_ratio['Total_Cases'].sum()

# df_ratio['ratio']=df_ratio['death_ratio']*df_ratio['Wt_Cases']

# df_ratio.sort_values(by=['ratio'], ascending=False).head(10)

In [13]:
# Excluding China
df_ratio = df[['Country','Total_Cases','New_Cases','Total_Deaths','New_Deaths','Total_recovered','Active_Cases','Serious_Cases']].copy()
df_ratio = df_ratio[~df_ratio['Country'].str.contains('China|Total')]
df_ratio['Death_Ratio'] = (df_ratio['Total_Deaths']/ (df_ratio['Total_recovered'] + df_ratio['Total_Deaths']))*100

df_ratio['Wt_Cases'] = df_ratio['Total_Cases']/df_ratio['Total_Cases'].sum()

df_ratio['Ratio']=df_ratio['Death_Ratio']*df_ratio['Wt_Cases']

df_total = df[df['Country'].str.contains('Total')][['Country','Total_recovered', 'Total_Deaths', 'Total_Cases']]

df_ratio_table = df_ratio[df_ratio['Total_Deaths']>0].sort_values(by=['Ratio'], ascending=False)[['Country','Total_Cases','Total_Deaths','Total_recovered','Death_Ratio','Ratio','New_Cases','New_Deaths','Active_Cases','Serious_Cases']]
df_ratio_table.rename(columns={'Total_Cases': 'Total Cases', 'Total_Deaths': 'Total Deaths', 
                                'New_Cases':'New Cases', 'New_Deaths':'New Deaths', 'Death_Ratio': 'Death Ratio', 
                                'Ratio':'Wt Ratio', 'Total_recovered': 'Total Reco', 
                               'Active_Cases': 'Active', 'Serious_Cases': 'Serious'}, inplace=True)
df_ratio_table[['Death Ratio', 'Wt Ratio']] = df_ratio_table[['Death Ratio', 'Wt Ratio']].round(2)

# Process India Dat
df_ind = df_ratio[df_ratio['Country'].str.contains('India')]

In [14]:
# World
df_world = df[['Country','Total_Cases','New_Cases','Total_Deaths','New_Deaths','Total_recovered','Active_Cases','Serious_Cases']].copy()
df_world = df_world[df_world['Country'].str.contains('Total')]
df_world['Closed_Cases'] = df_world['Total_Cases'] - df_world['Active_Cases']

In [15]:
# Process Yesterdays Data

df_y= df[['Country','Total_Cases_y','New_Cases_y','Total_Deaths_y','New_Deaths_y','Total_recovered_y','Active_Cases_y','Serious_Cases_y']].copy()
df_y = df_y[~df_y['Country'].str.contains('Total')]
df_y_plt = df_y[(df_y['New_Cases_y']>0)|(df_y['New_Deaths_y']>0)].sort_values(by=['New_Cases_y'], ascending=False)
df_y_plt_10 = df_y_plt.sort_values(by=['New_Cases_y'], ascending=False).head(10).copy()
df_y_plt.rename(columns={'Total_Cases_y': 'Total Cases', 'Total_Deaths_y': 'Total Deaths', 
                                'New_Cases_y':'New Cases', 'New_Deaths_y':'New Deaths', 'Total_recovered_y': 'Total Reco', 
                               'Active_Cases_y': 'Active', 'Serious_Cases_y': 'Serious'}, inplace=True)

In [16]:
df_y_plt.head()

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Reco,Active,Serious
8,USA,586941,26641,23640.0,1535,36948.0,526353,12772
13,UK,88621,4342,11329.0,717,0.0,76948,1559
11,France,136779,4188,14967.0,574,27718.0,94094,6821
15,Turkey,61049,4093,1296.0,98,3957.0,55796,1786
9,Spain,170099,3268,17756.0,547,64727.0,87616,7371


In [17]:
_s = "Death Ratio excluding China is <b>" + str(round(df_ratio['Total_Deaths'].sum()*100/(df_ratio['Total_Deaths'].sum() + df_ratio['Total_recovered'].sum()),2)) + "% </b>"

_s1 = "Death Ratio of World is <b>" + str(round(df_total['Total_Deaths'].sum()*100/(df_total['Total_Deaths'].sum() + df_total['Total_recovered'].sum()),2))+ "% </b>"

_s_ind_1 = "<i>New Cases: </i><b>" + str('{:,}'.format(df_ind['New_Cases'].sum())) + " </b><i> &nbsp;&nbsp;&nbsp;&nbsp; New Deaths: </i><b>" + str('{:,}'.format(df_ind['New_Deaths'].sum())) + "</b>"
_s_ind_2 = " &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <i>Total Cases: </i><b>" + str('{:,}'.format(df_ind['Total_Cases'].sum())) + " </b><i> &nbsp;&nbsp;&nbsp;&nbsp; Total Deaths: </i><b>" + str('{:,}'.format(int(df_ind['Total_Deaths'].sum()))) + "</b>"
_s_ind_3 = " &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <b> <i>India's total recovery is " + str(df_ind['Total_recovered'].sum()) + " with " + str(round(df_ind['Death_Ratio'].sum(),2)) + "% deaths ratio</i></b>"
_s_dt_title = "<b>Today: Analysis Table <b>"
_s_dt_title_y = "<b>Yesterday: Analysis Table <b>"
_s_dt_cal = " ** &nbsp; <i> Death Ratio = Death / (Death + Recovered) </i>" + " &nbsp;&nbsp;&nbsp;&nbsp; **<i> Weighted Ratio = Death Ratio * Total Country Cases / Total World Cases </i>"
_t_c = "<i>Cases: </i>" + str('{:,}'.format(df_total['Total_Cases'].sum()))
_t_d = " &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <i>Deaths: </i>" + str('{:,}'.format(int(df_total['Total_Deaths'].sum())))
_t_r = "<i> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Recovered: </i>" + str('{:,}'.format(df_total['Total_recovered'].sum()))

#### Plots

In [20]:
"""Bokeh Visualization Template

This template is a general outline for turning your data into a 
visualization using Bokeh.
"""
# Data handling
import pandas as pd
import numpy as np

# Bokeh libraries
from bokeh.io import output_file, output_notebook
from bokeh.plotting import figure, show, reset_output
from bokeh.models import ColumnDataSource, HoverTool, LabelSet, Div, Legend, LegendItem, NumeralTickFormatter
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel, DataTable, DateFormatter, TableColumn
from bokeh.transform import cumsum, dodge

# Prepare the data

# Determine where the visualization will be rendered
output_file('filename.html', title='Corona Analysis')  # Render to static HTML, or 
output_notebook()  # Render inline in a Jupyter Notebook

# Set up the figure(s)

source = ColumnDataSource(df_plt)

TOOLTIPS = [
    ("Country" , "@Country"),
    ("Total Deaths", "@Total_Deaths{0,0}"),
    ("Total Recovered", "@Total_recovered{0,0}"),
    ("Total Cases", "@Total_Cases{0,0}")
    ]

_world = Div(text="<b><u>World</u> :</b>", style={'font-size': '150%', 'color': 'darkgreen'})
_perc_closed = round((df_world['Closed_Cases']/df_world['Total_Cases']).sum()*100,2)
_Closed_cases = Div(text="<b>Closed Cases : " + str(_perc_closed) + "% </b>", width=275, style={'font-size': '125%', 'color': 'grey'})
_blank = Div(text="<b></b>", width=275, style={'font-size': '150%', 'color': 'grey'})
_india = Div(text="<b><u>India</u> :</b>", style={'font-size': '150%', 'color': 'darkblue'})
_total_cases = Div(text=_t_c, width=175, style={'font-size': '150%', 'color': 'grey'})
_total_death = Div(text=_t_d, width=275, height=10,  style={'font-size': '150%', 'color': ' chocolate'})
_total_rec = Div(text=_t_r, width=375, height=40, style={'font-size': '150%', 'color': ' goldenrod'})

source_w = ColumnDataSource(df_world)

w = figure(y_range=df_world['Country'].to_list(), plot_height = 160, plot_width = 600)
labels = LabelSet(x='Active_Cases', y='Country', text='Active_Cases', level='glyph', source=source_w, render_mode='canvas',
                 y_offset = 20, x_offset = -150)
w.add_layout(labels)
labels = LabelSet(x='Closed_Cases', y='Country', text='Closed_Cases', level='glyph', source=source_w, render_mode='canvas',
                 y_offset = 20, x_offset = 200 + 100)
w.add_layout(labels)
w.hbar_stack(['Active_Cases', 'Closed_Cases'], y='Country', height=0.20, source=source_w, color=("darkturquoise", "cadetblue"))
li1 = LegendItem(label='Active Cases', renderers=[w.renderers[0]])
li2 = LegendItem(label='Closed Cases', renderers=[w.renderers[1]])
legend1 = Legend(items=[li1, li2], location='bottom_right', orientation = 'horizontal')
w.add_layout(legend1)
w.xgrid.visible = False
w.ygrid.visible = False
w.xaxis.formatter=NumeralTickFormatter(format="0,0")

_arr_world = column(row(_world, _total_cases), w, _blank,_Closed_cases, row(_total_death, _total_rec))

dr_ex_ch = Div(text=_s, width=675, height=30, style={'font-size': '150%', 'color': 'red'}, background = 'floralwhite')
dr_w = Div(text=_s1,width=675, height=30, style = {'font-size': '150%', 'color': 'grey'}, background = 'floralwhite')

ind_1 = Div(text=_s_ind_1, width=400, height=20, style={'font-size': '150%', 'color': 'darkblue'})
ind_2 = Div(text=_s_ind_2,width=450, height=30, style = {'font-size': '150%', 'color': 'chocolate'})
ind_3 = Div(text=_s_ind_3,width=450, height=20, style = {'font-size': '115%', 'color': 'darkgreen'})

_arr_ind = column(_blank, row(_india, ind_1), ind_2, ind_3)

table_title_today = Div(text=_s_dt_title, width=200, height=10, style={'font-size': '100%', 'color': 'black'})
table_title_yest = Div(text=_s_dt_title_y, width=200, height=20, style={'font-size': '100%', 'color': 'black'})

_cal = Div(text=_s_dt_cal,width=675, height=10, style = {'font-size': '75%', 'color': 'grey'})

# Total Deaths (Excluding China)
s1 = figure(title='Total Deaths by Country (Top 10 - Excluding China)',
             plot_height=100, plot_width=700,
             x_axis_label='Countries', y_axis_label='Total Deaths/Recovered',
             x_range = df_plt['Country'].to_list(),y_range=(0, df_plt['Total_recovered'].values.max()+ df_plt['Total_Deaths'].values.max()),
             toolbar_location=None, tooltips = TOOLTIPS)  # Instantiate a figure() object

s1.vbar_stack(['Total_Deaths','Total_recovered'] ,x='Country', width=0.6, color=("darkblue", "lightblue"), source=source)
s1.xgrid.visible = False
s1.ygrid.visible = False

li1 = LegendItem(label='Total Deaths', renderers=[s1.renderers[0]])
li2 = LegendItem(label='Total Recovered', renderers=[s1.renderers[1]])
legend1 = Legend(items=[li1, li2], location='top_right')
s1.add_layout(legend1)

# New Cases & Deaths 
_source = ColumnDataSource(df_y_plt_10)
_tool_tips = [
    ("Country" , "@Country"),
    ("New Cases", "@New_Cases_y{0,0}"),
    ("New Deaths", "@New_Deaths_y{0,0}"),
    ("Total Cases","@Total_Cases_y{0,0}")]
s5 = figure(title='New Cases by Country (Top 10) as on Yesterday',
             plot_height=500, plot_width=700,
             x_axis_label='Countries', y_axis_label='New Cases & Deaths',
             x_range = df_y_plt_10['Country'].to_list(),y_range=(0, df_y_plt_10['New_Cases_y'].values.max()+ 500),
             toolbar_location=None, tooltips = _tool_tips)  
# s5.vbar_stack(['New_Deaths_y','New_Cases_y'] ,x='Country', width=0.8, color=("brown", "pink"), source=_source)
s5.vbar(x=dodge('Country', -0.33,  range=s5.x_range), top='New_Cases_y', width=0.3, source=_source,
       color="wheat", legend_label="New Cases")
s5.vbar(x=dodge('Country', 0.0, range=s5.x_range), top='New_Deaths_y', width=0.3, source=_source,
       color="brown", legend_label="New Deaths")
s5.xgrid.visible = False
s5.ygrid.visible = False
# li1 = LegendItem(label='New Deaths', renderers=[s5.renderers[0]])
# li2 = LegendItem(label='New Cases', renderers=[s5.renderers[1]])
# legend1 = Legend(items=[li1, li2], location='top_right')
# s5.add_layout(legend1)
s5.x_range.range_padding = 0.1
s5.legend.location = "top_right"
s5.legend.orientation = "vertical"
s5.yaxis.formatter=NumeralTickFormatter(format="0,0")

# Total Chart

_source_s6 = ColumnDataSource(df_new)
_tool_tips_p = [
    ("Country" , "@Country"),
    ("Total Cases", "@Total_Cases{0,0}"),
    ("Active Cases", "@Active_Cases{0,0}"),
    ("Serious Cases","@Serious_Cases{0,0}")]

s6 = figure(x_range=_source_s6.data['Country'], y_range=(0, _source_s6.data['Total_Cases'].max()+5000), plot_height=450, plot_width=700, 
            title="Total Cases by Country (Top 10)", toolbar_location=None, tools="", tooltips = _tool_tips_p)

s6.vbar(x=dodge('Country', -0.33, range=s6.x_range), top='Total_Cases', width=0.3, source=_source_s6,
       color="lightpink", legend_label="Total Cases")

s6.vbar(x=dodge('Country',  0.0,  range=s6.x_range), top='Active_Cases', width=0.3, source=_source_s6,
       color="lightsalmon", legend_label="Active Cases")

s6.x_range.range_padding = 0.1
s6.xgrid.grid_line_color = None
s6.legend.location = "top_right"
s6.legend.orientation = "vertical"
s6.xgrid.visible = False
s6.ygrid.visible = False
s6.yaxis.formatter=NumeralTickFormatter(format="0,0")

# # Pie Chart
# s2 = figure(plot_height=80, title="New Cases (Top 10 Countries)", toolbar_location=None,
#            tools="hover", tooltips="@Country: @New_Cases", x_range=(-0.5, 1.0))

# s2.wedge(x=0, y=1, radius=0.4,
#         start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
#         line_color="white", fill_color='color', legend= 'Country', source=df_pie_10)
# df_pie_10["value"] = df_pie_10['New_Cases'].astype(str)
# df_pie_10["value"] = df_pie_10["value"].str.pad(25, side = "left")
# labels = LabelSet(x=0, y=1, text='value', angle=cumsum('angle', include_zero=True), source=ColumnDataSource(df_pie_10), render_mode='canvas')
# s2.add_layout(labels)

# s2.axis.axis_label=None
# s2.axis.visible=False
# s2.grid.grid_line_color = None

Columns = [TableColumn(field=Ci, title=Ci) for Ci in df_ratio_table.columns] # bokeh columns
data_table = DataTable(columns=Columns, source=ColumnDataSource(df_ratio_table), fit_columns=True, width = 675, height=475
                      , index_position=None) # bokeh table

_Columns = [TableColumn(field=Ci, title=Ci) for Ci in df_y_plt.columns] # bokeh columns
data_table_y = DataTable(columns=_Columns, source=ColumnDataSource(df_y_plt), fit_columns=True, width = 675, height=475
                      , index_position=None) # bokeh table

_txt = column(_arr_world,_arr_ind)

# Organize the layout
grid = gridplot([[_txt,s6],[dr_ex_ch, dr_w],[s5, s1],[table_title_today, table_title_yest,],[_cal,None],[data_table, data_table_y]], plot_width=600, plot_height=500)
# Preview and save 
show(grid)  # See what I made, and save if I like it

In [19]:
df_plt

Unnamed: 0,Country,Total_recovered,Total_Deaths,Total_Cases
8,USA,38144.0,25195.0,603496
10,Italy,37130.0,21067.0,162488
9,Spain,67504.0,18056.0,172541
11,France,28805.0,15729.0,143303
13,UK,0.0,12107.0,93873
14,Iran,48129.0,4683.0,74877
16,Belgium,6868.0,4157.0,31119
12,Germany,68200.0,3272.0,131170
17,Netherlands,250.0,2945.0,27419
