In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

## Content
2019 Novel Coronavirus (2019-nCoV) is a virus (more specifically, a coronavirus) identified as the cause of an 

outbreak of respiratory illness first detected in Wuhan, China. Early on, many of the patients in the outbreak in 

Wuhan, China reportedly had some link to a large seafood and animal market, suggesting animal-to-person spread. 

However, a growing number of patients reportedly have not had exposure to animal markets, indicating person-to-

person spread is occurring. At this time, it’s unclear how easily or sustainably this virus is spreading between 

people - CDC

This dataset has daily level information on the number of affected cases, deaths and recovery from 2019 novel coronavirus. Please note that this is a time series data and so the number of cases on any given day is the cumulative number.

- The data is available from 22 Jan, 2020.



## Column Description

Main file in this dataset is covid_19_data.csv and the detailed descriptions are below.

covid_19_data.csv

- Sno - Serial number

- ObservationDate - Date of the observation in MM/DD/YYYY

- Province/State - Province or state of the observation (Could be empty when missing)

- Country/Region - Country of observation

- Last Update - Time in UTC at which the row is updated for the given province or country. (Not standardised and so please clean before using it)

- Confirmed - Cumulative number of confirmed cases till that date

- Deaths - Cumulative number of of deaths till that date

- Recovered - Cumulative number of recovered cases till that date

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
data_x = pd.read_csv('../input/novel-corona-virus-2019-dataset/covid_19_data.csv')
data_x.head()

In [None]:
data_x.info()

In [None]:
data_x.isnull().sum()

In [None]:
data_x.shape

In [None]:
data_x.nunique()

- Clearly there is no need to make it a time series.



In [None]:
data_x['Victims'] = data_x['Confirmed'] - data_x['Recovered'] - data_x['Deaths']
data_x['Victims'].head()

In [None]:
data_x['ObservationDate'] = pd.to_datetime(data_x['ObservationDate'])
data_x['ObservationDate'].head()

In [None]:


data_x['Days'] = data_x['ObservationDate'] - pd.to_datetime(['2020-01-22']*len(data_x))
data_x.head()

In [None]:
data_x["Days"] = data_x["Days"].astype('timedelta64[D]')
data_x.head()

The Concept here is to find the daily difference in the Data.
- So Days is only providing the daily information.

In [None]:
sns.set(style='whitegrid')
sns.boxplot(data_x['Confirmed'],color='g')

In [None]:
chin = data_x[data_x['Country/Region']=='Mainland China']
chin.head()

In [None]:
plt.scatter(chin['Days'],chin['Deaths'])
plt.xlabel("Days")
plt.ylabel("Death_toll")
plt.title("Deaths In Mainland China")

In [None]:
# Let's Check for Italy and India.

ita = data_x[data_x['Country/Region']=='Italy']
plt.scatter(ita['Days'],ita['Deaths'])
plt.xlabel("Days")
plt.ylabel("Death_toll")
plt.title("Deaths In Italy")

In [None]:
plt.scatter(ita['Days'],ita['Victims'])
plt.xlabel("Days")
plt.ylabel("Victims_toll")
plt.title("Victims In Italy")

In [None]:
ita.describe()

In [None]:
ita.nunique()

In [None]:
ind = data_x[data_x['Country/Region']=='India']
plt.scatter(ind['Days'],ind['Victims'])
plt.xlabel("Days")
plt.ylabel("No. of Victims")
plt.title("India's Statistics Against Corona")

In [None]:
ind

In [None]:
ind.nunique()

In [None]:
x = data_x[data_x['Country/Region']=="Mainland China"]
x.head()

In [None]:
x.nunique()

In [None]:
sns.set(style='whitegrid')
sns.boxplot(x['Deaths'],color='b')

In [None]:
sns.set(style='whitegrid')
sns.boxplot(x['Confirmed'],color='b')

In [None]:
x.describe()

In [None]:
x.isnull().sum()

In [None]:
plt.figure(figsize=(10,5))
plt.grid(True)
plt.scatter(data_x['Days'],data_x['Deaths'],marker='o',color='r')
plt.legend(loc=2)
plt.xlabel('Number of Days')
plt.ylabel('Deaths')
plt.title('Total Death Count ')

In [None]:
plt.plot(x.Victims[:100],'b',label='Active Victims')
plt.plot(x.Deaths[:100],'r',label='Deaths')

In [None]:
sns.jointplot(x['Confirmed'],x['Deaths'], kind='scatter',height=6 ,color='skyblue' )
plt.title('Death v/s Confirmed Cases')

In [None]:
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()



In [None]:
import plotly.graph_objs as go 

In [None]:
trace = go.Bar(x=x["Province/State"],y=x["Victims"])
data = [trace]
layout = {"title":"Victims in Different Provinces of China",
         "xaxis":{"title":"Provinces/States","tickangle":90},
         "yaxis":{"title":"Number of Victims"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)

- Condition of Different Provinces of China.

- Clearly the Region **Hubei** in Mainland China has the most amount of deaths and after that it is the state of **Zhejiang**.

In [None]:
trace = go.Bar(x=x["Province/State"],y=x["Deaths"])
data = [trace]
layout = {"title":"Deaths in Different Provinces of China",
         "xaxis":{"title":"Provinces/States","tickangle":90},
         "yaxis":{"title":"Number of Deaths"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)

In [None]:
trace = go.Bar(x=x["Days"],y=x["Deaths"])
data = [trace]
layout = {"title":"Deaths in China with respect to Days",
         "xaxis":{"title":"No. of Days","tickangle":0},
         "yaxis":{"title":"Number of Deaths"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)

In [None]:
## Let's see the Recovery Rate of China:
trace = go.Bar(x=x["Days"],y=x["Recovered"])
data = [trace]
layout = {"title":"Recovery in China",
         "xaxis":{"title":"Days","tickangle":0},
         "yaxis":{"title":"Recovered Total"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)

In [None]:
x.head()

## Recovery v/s Victims in China.

In [None]:
## Let's try to make a dataset with Countries, Deaths and Victims.

Total_days = len(x['Days'].value_counts())


C = pd.DataFrame(x.groupby('Days')['Deaths','Victims','Confirmed','Recovered'].sum())
C['Days'] = C.index
C.index=np.arange(1, Total_days + 1)

stat = C[['Days','Deaths','Victims','Confirmed','Recovered']]
stat.sort_values(by=['Days'],ascending=True)

In [None]:
tr1 = go.Bar(x=stat['Days'],y=stat["Confirmed"],name="Confirmed Cases")
tr = go.Bar(x=stat['Days'],y=stat["Recovered"],name="Recovered Patients")
layout = {'title':'Confirmed Cases v/s Recovered Patients in China',
         "xaxis":{"title":"No. of Days"},
         "yaxis":{"title":"No. of Patients"},
         "barmode":"group"}
data = [tr1,tr]

fig=go.Figure(data=data,layout=layout)
iplot(fig)

In [None]:
tr1 = go.Bar(x=stat['Days'],y=stat["Confirmed"],name="Confirmed Cases")
tr = go.Bar(x=stat['Days'],y=stat["Recovered"],name="Recovered Patients")
layout = {'title':'Confirmed Cases v/s Recovered Patients',
         "xaxis":{"title":"No. of Days"},
         "yaxis":{"title":"No. of Patients"},
         "barmode":"stack"}
data = [tr,tr1]

fig=go.Figure(data=data,layout=layout)
iplot(fig)

In [None]:
# Let's try to visualize similar data for Italy as well.

trace = go.Bar(x=ita["Days"],y=ita["Deaths"])
data = [trace]
layout = {"title":"Deaths in Italy",
         "xaxis":{"title":"No.of Days","tickangle":0},
         "yaxis":{"title":"Number of Deaths"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)


- Clearly by the end of 48th day: Italy lost more than 600 lives.(11th March)  

In [None]:
# data.groupby('Customer_status')['ActiveSinceDays'].mean()      #customer status and ASDays mean
y = x.groupby('Province/State')['Deaths']
y.nunique()

In [None]:
h = x[x['Province/State']=='Hubei']
# h.drop(['Case','States'],axis=1,inplace= True)
h

In [None]:
trace = go.Bar(x=h["Days"],y=h["Deaths"])
data = [trace]
layout = {"title":"Deaths in Hubei(China)",
         "xaxis":{"title":"No.of Days","tickangle":0},
         "yaxis":{"title":"Number of Deaths"}}
fig = go.Figure(data = data,layout=layout)
iplot(fig)


In [None]:
trace = go.Bar(x=h["Days"],y=h["Recovered"],name="Recovered")
trace1 = go.Bar(x=h["Days"],y=h["Deaths"],name="Deaths")
data = [trace,trace1]
layout = {"title":"Recovery v/s Deaths in Hubei(China)",
         "xaxis":{"title":"No.of Days","tickangle":0},
         "yaxis":{"title":"Recovery and Death rate"},
         "barmode":"group"}
fig = go.Figure(data = data,layout=layout)
iplot(fig)


In [None]:
sns.boxplot(h['Deaths'])

- SO, Clearly the most amount of deaths are in MainLand China and with most Occuring in the State "Hubei" 

In [None]:
## Let's try to make a dataset with Countries, Deaths and Victims.

Total_countries = len(data_x['Country/Region'].value_counts())


COR = pd.DataFrame(data_x.groupby('Country/Region')['Deaths','Victims'].sum())
COR['Country/Region'] = COR.index
COR.index=np.arange(1, Total_countries + 1)

new = COR[['Country/Region','Deaths','Victims']]
new.sort_values(by=['Deaths'],ascending=False)

In [None]:
import plotly.express as px

fig = px.bar(new[['Country/Region', 'Victims']].sort_values('Victims', ascending=False), 
             y="Victims", x="Country/Region", color='Country/Region', 
             log_y=True, template='ggplot2', title='VICTIMS ANALYSIS')
fig.show()

fig = px.bar(new[['Country/Region', 'Deaths']].sort_values('Deaths', ascending=False), 
             y="Deaths", x="Country/Region", color='Country/Region', title='Deaths',
             log_y=True, template='ggplot2')
fig.show()

In [None]:
# fig = px.pie(df, values='pop', names='country', title='Population of European continent')
# fig.show()


fig = px.pie(new , values='Deaths',names='Country/Region',title='Deaths Due to COVID')
fig.show()


In [None]:
fig = px.pie(new , values='Victims',names='Country/Region',title='Victims Analysis')
fig.show()