# Importing Library

In [1]:
import datetime
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/covid19/train_data.csv


# Loading Dataset

In [2]:
df = pd.read_csv('../input/covid19/train_data.csv')
df.head()

Unnamed: 0,Id,Province_State,Country_Region,Date,ConfirmedCases,Fatalities
0,1,,Afghanistan,2020-01-22,0.0,0.0
1,2,,Afghanistan,2020-01-23,0.0,0.0
2,3,,Afghanistan,2020-01-24,0.0,0.0
3,4,,Afghanistan,2020-01-25,0.0,0.0
4,5,,Afghanistan,2020-01-26,0.0,0.0


In [3]:
print('Data information')
print(df.info(), end='\n\n')
print('Checking for null values')
print(df.isnull().sum(), end='\n\n')
print('Necessary information from the dataset')
print('Total affected countries ', len(df['Country_Region'].unique()))
print('Total confirmed cases ', df['ConfirmedCases'].sum())
print('Total fatalities cases ', df['Fatalities'].sum())

Data information
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25353 entries, 0 to 25352
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Id              25353 non-null  int64  
 1   Province_State  10773 non-null  object 
 2   Country_Region  25353 non-null  object 
 3   Date            25353 non-null  object 
 4   ConfirmedCases  25353 non-null  float64
 5   Fatalities      25353 non-null  float64
dtypes: float64(2), int64(1), object(3)
memory usage: 1.2+ MB
None

Checking for null values
Id                    0
Province_State    14580
Country_Region        0
Date                  0
ConfirmedCases        0
Fatalities            0
dtype: int64

Necessary information from the dataset
Total affected countries  184
Total confirmed cases  25449963.0
Total fatalities cases  1277610.0


# Global Confirmed Cases from 2020/01/22 to 2020/04/11

In [4]:
d = df['Date'].unique()
date = {}
for i in d:
    date.update({i:0})
    
for i in date:
    date.update({i:df[df['Date']==i]['ConfirmedCases'].sum()})

In [5]:
x_values = [datetime.datetime.strptime(d,"%Y-%m-%d").date() for d in date.keys()]
y_values = date.values()

In [6]:
x_values = [i for i in range(1,82)]
y_values = [i for i in y_values]

fig = px.line(x=x_values, y=y_values, title='Global Confirmed Cases', labels={'x':'Days', 'y':'Confirmed Case'},height=600,)
fig.show()

# Global Fatalities Cases from 2020/01/22 to 2020/04/11

In [7]:
d = df['Date'].unique()
date = {}
for i in d:
    date.update({i:0})
    
for i in date:
    date.update({i:df[df['Date']==i]['Fatalities'].sum()})

In [8]:
x_values = [datetime.datetime.strptime(d,"%Y-%m-%d").date() for d in date.keys()]
y_values = date.values()

In [9]:
x_values = [i for i in range(1,82)]
y_values = [i for i in y_values]

fig = px.line(x=x_values, y=y_values, title='Global Fatality Cases', labels={'x':'Days', 'y':'Fatality Case'},height=600,)
fig.show()

# Countrywise Analysis

In [10]:
data = {'Country':[], 'ConfirmedCases':[], 'Fatalities':[]}
data.update({'Country':df['Country_Region'].unique()})

confirm_case = []
for i in data['Country']:
    confirm_case.append(df[df['Country_Region'] == i]['ConfirmedCases'].sum())

fatalities_case = []
for i in data['Country']:
    fatalities_case.append(df[df['Country_Region'] == i]['Fatalities'].sum())
    
data.update({'ConfirmedCases':confirm_case})
data.update({'Fatalities':fatalities_case})

In [11]:
data = pd.DataFrame(data)
data.head()

Unnamed: 0,Country,ConfirmedCases,Fatalities
0,Afghanistan,5458.0,146.0
1,Albania,6184.0,313.0
2,Algeria,20118.0,2192.0
3,Andorra,8430.0,262.0
4,Angola,205.0,28.0


In [12]:
df_confirm_asc = data.sort_values(by=['ConfirmedCases'], ascending=False)

In [13]:
df_confirm_asc = df_confirm_asc.reset_index(drop=True)
df_confirm_asc.style.background_gradient(cmap="Reds")

Unnamed: 0,Country,ConfirmedCases,Fatalities
0,US,5135445.0,147545.0
1,China,5096274.0,182450.0
2,Italy,2661341.0,297444.0
3,Spain,2237252.0,200412.0
4,Germany,1728391.0,24491.0
5,France,1476739.0,122427.0
6,Iran,1184893.0,75219.0
7,United Kingdom,749434.0,72805.0
8,Turkey,408544.0,8310.0
9,Switzerland,393842.0,11029.0


In [14]:
x_values = [i for i in df_confirm_asc.loc[0:9,'Country']]
y_values = [i for i in df_confirm_asc.loc[0:9,'ConfirmedCases']]
y_values = y_values[::-1]
x_values = x_values[::-1]
df1 = {'Country':x_values, 'ConfirmedCases':y_values}
df1 = pd.DataFrame(df1)

In [15]:
fig = px.bar(df1, x='ConfirmedCases', y='Country',  color_discrete_sequence=["red"]*10, title='Top 10 Highest Confirmed Cases Country', barmode="group")
fig.show()

In [16]:
df_fatality_asc = data.sort_values(by=['Fatalities'], ascending=False)
df_fatality_asc = df_fatality_asc.reset_index(drop=True)
df_fatality_asc.style.background_gradient(cmap="Reds")

Unnamed: 0,Country,ConfirmedCases,Fatalities
0,Italy,2661341.0,297444.0
1,Spain,2237252.0,200412.0
2,China,5096274.0,182450.0
3,US,5135445.0,147545.0
4,France,1476739.0,122427.0
5,Iran,1184893.0,75219.0
6,United Kingdom,749434.0,72805.0
7,Netherlands,310043.0,27083.0
8,Germany,1728391.0,24491.0
9,Belgium,319318.0,23652.0


In [17]:
x_values = [i for i in df_fatality_asc.loc[0:9,'Country']]
y_values = [i for i in df_fatality_asc.loc[0:9,'Fatalities']]
x_values = x_values[::-1]
y_values = y_values[::-1]
df1 = {'Country':x_values, 'Fatality Cases':y_values}
df1 = pd.DataFrame(df1)

In [18]:
fig = px.bar(df1, x='Fatality Cases', y='Country',  color_discrete_sequence=["red"]*10, title='Top 10 Highest Fatality Cases Country', barmode="group")
fig.show()

In [19]:
df_confirm_asc = data.sort_values(by=['ConfirmedCases'], ascending=True)
df_confirm_asc = df_confirm_asc.reset_index(drop=True)
df_confirm_asc.style.background_gradient(cmap="Reds")

Unnamed: 0,Country,ConfirmedCases,Fatalities
0,South Sudan,17.0,0.0
1,Timor-Leste,23.0,0.0
2,Sao Tome and Principe,24.0,0.0
3,Western Sahara,28.0,0.0
4,Papua New Guinea,29.0,0.0
5,Burundi,36.0,0.0
6,Sierra Leone,59.0,0.0
7,Malawi,64.0,6.0
8,Gambia,84.0,20.0
9,Botswana,86.0,12.0


In [20]:
x_values = [i for i in df_confirm_asc.loc[0:9,'Country']]
y_values = [i for i in df_confirm_asc.loc[0:9,'ConfirmedCases']]
x_values = x_values[::-1]
y_values = y_values[::-1]
df1 = {'Country':x_values, 'ConfirmedCases':y_values}
df1 = pd.DataFrame(df1)

In [21]:
fig = px.bar(df1, x='ConfirmedCases', y='Country',  color_discrete_sequence=["red"]*10, title='10 Lowest Confirmed Cases Country', barmode="group")
fig.show()

In [22]:
df_fatality_asc = data.sort_values(by=['Fatalities'], ascending=True)
df_fatality_asc = df_fatality_asc.reset_index(drop=True)
df_fatality_asc.style.background_gradient(cmap="Reds")

Unnamed: 0,Country,ConfirmedCases,Fatalities
0,Saint Kitts and Nevis,137.0,0.0
1,Equatorial Guinea,312.0,0.0
2,Madagascar,1150.0,0.0
3,Sao Tome and Principe,24.0,0.0
4,Saint Vincent and the Grenadines,97.0,0.0
5,Burundi,36.0,0.0
6,Maldives,498.0,0.0
7,Cambodia,2524.0,0.0
8,Saint Lucia,219.0,0.0
9,Laos,192.0,0.0


In [23]:
x_values = [i for i in df_fatality_asc.loc[34:43,'Country']]
y_values = [i for i in df_fatality_asc.loc[34:43,'Fatalities']]
x_values = x_values[::-1]
y_values = y_values[::-1]
df1 = {'Country':x_values, 'Fatalities':y_values}
df1 = pd.DataFrame(df1)

In [24]:
fig = px.bar(df1, x='Fatalities', y='Country',  color_discrete_sequence=["red"]*10, title='10 Lowest Fatality Cases Country', barmode="group")
fig.show()

In [25]:
df_confirm_asc = data.sort_values(by=['ConfirmedCases'], ascending=False)
df_confirm_asc = df_confirm_asc.reset_index(drop=True)
x_values = [i for i in df_confirm_asc.loc[0:9,'Country']]
y_values = [i for i in df_confirm_asc.loc[0:9,'ConfirmedCases']]
df1 = {'Country':x_values, 'ConfirmedCases':y_values}
df1 = pd.DataFrame(df1)

fig = px.pie(df1, values='ConfirmedCases', names='Country', title='Top 10 Highest Confirmed Cases Country', color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [26]:
df_fatality_asc = data.sort_values(by=['Fatalities'], ascending=False)
df_fatality_asc = df_fatality_asc.reset_index(drop=True)
x_values = [i for i in df_fatality_asc.loc[0:9,'Country']]
y_values = [i for i in df_fatality_asc.loc[0:9,'Fatalities']]
df1 = {'Country':x_values, 'Fatalities':y_values}
df1 = pd.DataFrame(df1)

fig = px.pie(df1, values='Fatalities', names='Country', title='Top 10 Highest Fatality Cases Country', color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [27]:
data = {'Date':[], 'US':[], 'China':[], 'Italy':[], 'Spain':[], 'Germany':[], 'France':[], 'Iran':[]}
data.update({'Date': df['Date'].unique()})

for i in data['Date']:
    data['US'].append(df[(df['Date']==i) & (df['Country_Region']=='US')]['ConfirmedCases'].sum())
    data['China'].append(df[(df['Date']==i) & (df['Country_Region']=='China')]['ConfirmedCases'].sum())
    data['Italy'].append(df[(df['Date']==i) & (df['Country_Region']=='Italy')]['ConfirmedCases'].sum())
    data['Spain'].append(df[(df['Date']==i) & (df['Country_Region']=='Spain')]['ConfirmedCases'].sum())
    data['Germany'].append(df[(df['Date']==i) & (df['Country_Region']=='Germany')]['ConfirmedCases'].sum())
    data['France'].append(df[(df['Date']==i) & (df['Country_Region']=='France')]['ConfirmedCases'].sum())
    data['Iran'].append(df[(df['Date']==i) & (df['Country_Region']=='Iran')]['ConfirmedCases'].sum())
    
data = pd.DataFrame(data)
data.head() 

Unnamed: 0,Date,US,China,Italy,Spain,Germany,France,Iran
0,2020-01-22,0.0,548.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,643.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,920.0,0.0,0.0,0.0,2.0,0.0
3,2020-01-25,0.0,1406.0,0.0,0.0,0.0,3.0,0.0
4,2020-01-26,0.0,2075.0,0.0,0.0,0.0,3.0,0.0


In [28]:
df_long=pd.melt(data, id_vars=['Date'], value_vars=['US', 'China', 'Italy', 'Spain', 'Germany','France','Iran'])
fig = px.line(df_long, x='Date', y='value', color='variable', labels={'Date':'Date', 'value':'Confirmed Case'} ,title = 'US, Chian, Italy, Spain, Germany, France, Iran Confirmed Cases \nFrom 2020/01/22 to 2020/04/11')
fig.show()

In [29]:
data = {'Date':[], 'US':[], 'China':[], 'Italy':[], 'Spain':[], 'Germany':[], 'France':[], 'Iran':[]}
data.update({'Date': df['Date'].unique()})

for i in data['Date']:
    data['US'].append(df[(df['Date']==i) & (df['Country_Region']=='US')]['Fatalities'].sum())
    data['China'].append(df[(df['Date']==i) & (df['Country_Region']=='China')]['Fatalities'].sum())
    data['Italy'].append(df[(df['Date']==i) & (df['Country_Region']=='Italy')]['Fatalities'].sum())
    data['Spain'].append(df[(df['Date']==i) & (df['Country_Region']=='Spain')]['Fatalities'].sum())
    data['Germany'].append(df[(df['Date']==i) & (df['Country_Region']=='Germany')]['Fatalities'].sum())
    data['France'].append(df[(df['Date']==i) & (df['Country_Region']=='France')]['Fatalities'].sum())
    data['Iran'].append(df[(df['Date']==i) & (df['Country_Region']=='Iran')]['Fatalities'].sum())
    
data = pd.DataFrame(data)
data.head()

Unnamed: 0,Date,US,China,Italy,Spain,Germany,France,Iran
0,2020-01-22,0.0,17.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-23,0.0,18.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-24,0.0,26.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-25,0.0,42.0,0.0,0.0,0.0,0.0,0.0
4,2020-01-26,0.0,56.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df_long=pd.melt(data, id_vars=['Date'], value_vars=['US', 'China', 'Italy', 'Spain', 'Germany','France','Iran'])
fig = px.line(df_long, x='Date', y='value', color='variable', labels={'Date':'Date', 'value':'Fatality Case'} ,title = 'US, Chian, Italy, Spain, Germany, France, Iran Confirmed Cases \nFrom 2020/01/22 to 2020/04/11')
fig.show()