# Importing Libararies

In [87]:
import gc
import os
from pathlib import Path
import random
import sys

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import scipy as sp


import matplotlib.pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML

# --- plotly ---
from plotly import tools, subplots
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.io as pio
pio.templates.default = "plotly_white"

# Loading Dataset

In [88]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Id,Province_State,Country_Region,Date,ConfirmedCases,Fatalities
0,1,,Afghanistan,2020-01-22,0.0,0.0
1,2,,Afghanistan,2020-01-23,0.0,0.0
2,3,,Afghanistan,2020-01-24,0.0,0.0
3,4,,Afghanistan,2020-01-25,0.0,0.0
4,5,,Afghanistan,2020-01-26,0.0,0.0


### Removing rows who have no Confirmed Cases and Fatality

In [89]:
df = df.drop(df[(df['ConfirmedCases'] == 0.0) & (df['Fatalities'] == 0.0)].index)
df.reset_index(drop=True).head()

Unnamed: 0,Id,Province_State,Country_Region,Date,ConfirmedCases,Fatalities
0,34,,Afghanistan,2020-02-24,1.0,0.0
1,35,,Afghanistan,2020-02-25,1.0,0.0
2,36,,Afghanistan,2020-02-26,1.0,0.0
3,37,,Afghanistan,2020-02-27,1.0,0.0
4,38,,Afghanistan,2020-02-28,1.0,0.0


## Top 10 countries with Highest Confirmed Cases 

In [90]:
top10_countries = df.groupby("Country_Region")["ConfirmedCases"].sum().sort_values(ascending = False).head(10)
print(top10_countries)

Country_Region
US                5135445.0
China             5096274.0
Italy             2661341.0
Spain             2237252.0
Germany           1728391.0
France            1476739.0
Iran              1184893.0
United Kingdom     749434.0
Turkey             408544.0
Switzerland        393842.0
Name: ConfirmedCases, dtype: float64


In [91]:
top10 = pd.DataFrame(top10_countries)
fig = px.bar(top10, x=top10.index, y='ConfirmedCases', labels={'x':'Country_Region'},
             color="ConfirmedCases", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(title_text=' Top 10 Confirmed COVID-19 cases by country')
fig.show()

## 10 Countries with Least Number of Covid-19 Cases

In [92]:
last10_countries = df.groupby("Country_Region")["ConfirmedCases"].sum().sort_values(ascending = True).head(10)
print(last10_countries)

Country_Region
South Sudan              17.0
Timor-Leste              23.0
Sao Tome and Principe    24.0
Western Sahara           28.0
Papua New Guinea         29.0
Burundi                  36.0
Sierra Leone             59.0
Malawi                   64.0
Gambia                   84.0
Botswana                 86.0
Name: ConfirmedCases, dtype: float64


In [93]:
last10 = pd.DataFrame(last10_countries)
fig = px.bar(last10, x=last10.index, y='ConfirmedCases', labels={'x':'Country_Region'},
             color="ConfirmedCases", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(title_text='Least Confirmed COVID-19 cases by country')
fig.show()

## Top 10 countries with Highest Death Cases

In [94]:
top10_deathcases = df.groupby("Country_Region")["Fatalities"].sum().sort_values(ascending = False).head(10)
print(top10_deathcases)

Country_Region
Italy             297444.0
Spain             200412.0
China             182450.0
US                147545.0
France            122427.0
Iran               75219.0
United Kingdom     72805.0
Netherlands        27083.0
Germany            24491.0
Belgium            23652.0
Name: Fatalities, dtype: float64


In [95]:
top10_death = pd.DataFrame(top10_deathcases)
fig = px.bar(top10_death, x=top10_death.index, y='Fatalities', labels={'x':'Country_Region'},
             color="Fatalities", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(title_text=' Top 10 Death cases by country')
fig.show()

## Covid-19 Cases in USA over Days

In [76]:
df_by_date_USA = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['ConfirmedCases'].sum().sort_values().reset_index())

fig = px.bar(df_by_date_USA.loc[(df_by_date_USA['Country_Region'] == 'US') &(df_by_date_USA.Date >= '2020-03-02')].sort_values('ConfirmedCases',ascending = False), 
             x='Date', y='ConfirmedCases', color="ConfirmedCases", color_continuous_scale=px.colors.sequential.BuGn)
fig.update_layout(title_text='Confirmed COVID-19 cases per day in USA')
fig.show()

## Covid-19 Death Cases in USA over Days

In [77]:
df_death_by_date_USA = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['Fatalities'].sum().sort_values().reset_index())

fig = px.bar(df_death_by_date_USA.loc[(df_death_by_date_USA['Country_Region'] == 'US') &(df_death_by_date_USA.Date >= '2020-03-08')].sort_values('Fatalities',ascending = False), 
             x='Date', y='Fatalities', color="Fatalities", color_continuous_scale=px.colors.sequential.Cividis_r)
fig.update_layout(title_text='Fatalities COVID-19 cases per day in USA')
fig.show()


## Covid-19 Cases in China over Days

In [78]:
df_by_date_China = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['ConfirmedCases'].sum().sort_values().reset_index())

fig = px.bar(df_by_date_China.loc[(df_by_date_China['Country_Region'] == 'China') &(df_by_date_China.Date >= '2019-01-22')].sort_values('ConfirmedCases',ascending = False), 
             x='Date', y='ConfirmedCases', color="ConfirmedCases", color_continuous_scale=px.colors.sequential.BuGn)
fig.update_layout(title_text='Confirmed COVID-19 cases per day in China')
fig.show()

## Covid-19 Death Cases in China over Days

In [79]:
df_death_by_date_China = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['Fatalities'].sum().sort_values().reset_index())

fig = px.bar(df_death_by_date_China.loc[(df_death_by_date_China['Country_Region'] == 'China') &(df_death_by_date_China.Date >= '2020-01-01')].sort_values('Fatalities',ascending = False), 
             x='Date', y='Fatalities', color="Fatalities", color_continuous_scale=px.colors.sequential.Cividis_r)
fig.update_layout(title_text='Fatalities COVID-19 cases per day in China')
fig.show()

## Covid-19 Cases in India over Days

In [80]:
df_by_date_India = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['ConfirmedCases'].sum().sort_values().reset_index())

fig = px.bar(df_by_date_India.loc[(df_by_date_India['Country_Region'] == 'India') &(df_by_date_India.Date >= '2020-03-04')].sort_values('ConfirmedCases',ascending = False), 
             x='Date', y='ConfirmedCases', color="ConfirmedCases", color_continuous_scale=px.colors.sequential.BuGn)
fig.update_layout(title_text='Confirmed COVID-19 cases per day in IND')
fig.show()

## Covid-19 Death Cases in India over Days

In [96]:
df_death_by_date_India = pd.DataFrame(df.fillna('NA').groupby(['Country_Region','Date'])['Fatalities'].sum().sort_values().reset_index())

fig = px.bar(df_death_by_date_India.loc[(df_death_by_date_India['Country_Region'] == 'India') &(df_death_by_date_India.Date >= '2020-03-08')].sort_values('Fatalities',ascending = False), 
             x='Date', y='Fatalities', color="Fatalities", color_continuous_scale=px.colors.sequential.Cividis_r)
fig.update_layout(title_text='Fatalities COVID-19 cases per day in IND')
fig.show()

## Top 10 countries with Highest Active Cases

In [85]:
df['Active_Cases']= df['ConfirmedCases'] - df['Fatalities']
top10_Active = df.groupby("Country_Region")["Active_Cases"].sum().sort_values(ascending = False).head(10)
print(top10_Active)

Country_Region
US                4987900.0
China             4913824.0
Italy             2363897.0
Spain             2036840.0
Germany           1703900.0
France            1354312.0
Iran              1109674.0
United Kingdom     676629.0
Turkey             400234.0
Switzerland        382813.0
Name: Active_Cases, dtype: float64


In [86]:
top10_ActiveCases = pd.DataFrame(top10_Active)
fig = px.bar(top10_ActiveCases, x=top10_ActiveCases.index, y='Active_Cases', labels={'x':'Country_Region'},
             color="Active_Cases", color_continuous_scale=px.colors.sequential.Plasma)
fig.update_layout(title_text=' Top 10 Active COVID-19 cases by country')
fig.show()