# A simple analysis and visualization on COVID19

### Business Understanding
For this project, I used Covid19 data from kaggle to analyze:

- How Pandemic spread throught the world
- How India is doing

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_rows', None)
from plotly.subplots import make_subplots
import seaborn as sns
import datetime

### Data Understanding
First, we provide a general view to our data. For example, which country is doing good in this time


In [3]:
data = pd.read_csv('covid_19_data.csv')

#Percentage of nan Values 
NAN = [(c, data[c].isna().mean()*100) for c in data]
NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
print("NAN percentage :", NAN)

#Fill them with 'unknown'
data["Province/State"]= data["Province/State"].fillna('Unknown')

NAN percentage :        column_name  percentage
0              SNo    0.000000
1  ObservationDate    0.000000
2   Province/State   30.266684
3   Country/Region    0.000000
4      Last Update    0.000000
5        Confirmed    0.000000
6           Deaths    0.000000
7        Recovered    0.000000


In [4]:
#Lets have a look
data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [5]:
#Change datatype of confirmed, deaths and recovered to int to save some memory.
data[["Confirmed","Deaths","Recovered"]] = data[["Confirmed","Deaths","Recovered"]].astype(int)

#Change Mainland China to China
data['Country/Region'] = data['Country/Region'].replace('Mainland China', 'China')

In [6]:
# Now we create a new feature "Active"
data['Active'] = data['Confirmed'] - data['Deaths'] - data['Recovered']
data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active
0,1,01/22/2020,Anhui,China,1/22/2020 17:00,1,0,0,1
1,2,01/22/2020,Beijing,China,1/22/2020 17:00,14,0,0,14
2,3,01/22/2020,Chongqing,China,1/22/2020 17:00,6,0,0,6
3,4,01/22/2020,Fujian,China,1/22/2020 17:00,1,0,0,1
4,5,01/22/2020,Gansu,China,1/22/2020 17:00,0,0,0,0


In [7]:
#Latest update for better use of our analysis
LatestData = data[data['ObservationDate'] == max(data['ObservationDate'])].reset_index()

# Look at the world...

In [8]:
Data_world = LatestData.groupby(["ObservationDate"])[["Confirmed", "Active", "Recovered", "Deaths"]].sum().reset_index()

labels = ["Last Update","Confirmed","Active cases","Recovered","Deaths"]
fig = go.Figure(data=[go.Table(header=dict(values=labels),cells=dict(values=Data_world.loc[0,["ObservationDate","Confirmed","Active","Recovered","Deaths"]]))])
fig.update_layout(title='Coronavirus in the word : ')
fig.show()

In [9]:
# A better in-depth view...

labels = ["Active cases","Recovered","Deaths"]
values = Data_world.loc[0, ["Active","Recovered","Deaths"]]
fig = px.pie(Data_world, values=values, names=labels,color_discrete_sequence=['rgb(77,146,33)','rgb(69,144,185)','rgb(77,77,77)'],hole=0.7)
fig.update_layout(title='Total cases : '+str(Data_world["Confirmed"][0]))
fig.show()

In [10]:
# Confirmed cases over time...

data_over_time= data.groupby(["ObservationDate"])[["Confirmed","Active","Recovered","Deaths"]].sum().reset_index().sort_values("ObservationDate",ascending=True).reset_index(drop=True)

fig = go.Figure()
fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Confirmed'], mode='lines', name='Confirmed cases'))
fig.update_layout(title='Evolution of Confirmed cases over time in the word', template='plotly_white', yaxis_title="Confirmed cases", xaxis_title="Days")
fig.show()

In [11]:
# Active cases over time...

fig = go.Figure()
fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Active'], mode='lines',marker_color='yellow',name='Active cases',line=dict( dash='dot')))
fig.update_layout(title='Evolution of Active cases over time in the world',template='plotly_dark',yaxis_title="Active cases",xaxis_title="Days")
fig.show()

In [12]:
# Recovered cases over time...

fig = go.Figure()
fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Recovered'],mode='lines',name='Recovered cases',marker_color='green'))
fig.update_layout(title='Evolution of Recovered cases over time in the world',template='plotly_white',yaxis_title="Recovered cases",xaxis_title="Days")
fig.show()

In [13]:
# Deaths over time...

fig = go.Figure()
fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Deaths'],name='Deaths',marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.update_layout(title='Evolution of Deaths over time in the world',template='plotly_white',yaxis_title="Deaths",xaxis_title="Days")
fig.show()

In [14]:
# Daily confirmed cases

fig = go.Figure(go.Bar(x=data_over_time['ObservationDate'],y=data_over_time['Confirmed']))
fig.update_layout(title='Confirmed Cases In Each Day',template='plotly_white',xaxis_title="Confirmed Cases",yaxis_title="Days")
fig.show()

In [15]:
# Daily Active cases...

fig = go.Figure(go.Bar(x=data_over_time['ObservationDate'],y=data_over_time['Active'],marker_color='rgb(253,187,132)'))
fig.update_layout(title='Active Cases In Each Day',template='plotly_dark',xaxis_title="Active Cases",yaxis_title="Days",)
fig.show()

In [16]:
# Daily recovered cases...

fig = go.Figure(go.Bar(x=data_over_time['ObservationDate'],y=data_over_time['Recovered'],marker_color='rgb(178,24,43)'))
fig.update_layout(title='Recovered Cases In Each Day',template='plotly_white',xaxis_title="Recovered Cases",yaxis_title="Days",)
fig.show()

In [17]:
# Daily deaths...

fig = go.Figure(go.Bar(x=data_over_time['ObservationDate'],y=data_over_time['Deaths'],marker_color='rgb(13,48,100)'))
fig.update_layout(title='Deaths In Each Day',template='plotly_white',xaxis_title="Deaths",yaxis_title="Days")
fig.show()

### 10 most infected countries...

In [21]:
# Get data per country...
data_per_country = data.groupby(["Country/Region","ObservationDate"])[["Confirmed","Active","Recovered","Deaths"]].sum().reset_index().sort_values("ObservationDate",ascending=True).reset_index(drop=True)

fig = go.Figure(data=[go.Scatter(x=data_per_country['Country/Region'][0:10],y=data_per_country['Confirmed'][0:10],mode='markers',marker=dict(color=100+np.random.randn(500),size=(data_per_country['Confirmed'][0:10]/25000),showscale=True))])

fig.update_layout(title='Most 10 infected Countries',xaxis_title="Countries",yaxis_title="Confirmed Cases",template='plotly_dark')
fig.show()

### Recoveries per country...

In [35]:
Recovered_per_country = data_per_country.groupby(["Country/Region"])["Recovered"].sum().reset_index().sort_values("Recovered",ascending=False).reset_index(drop=True)

fig = go.Figure(data=[go.Scatter(x=Recovered_per_country['Country/Region'][0:10],y=Recovered_per_country['Recovered'][0:10],mode='markers',marker=dict(color=100+np.random.randn(500),size=(data_per_country['Recovered'][0:10]*2),showscale=True))])

fig.update_layout(
title='Active cases in 10 most infected Countries',xaxis_title="Countries",yaxis_title="Recovered Cases",template='plotly_white')
fig.show()

### Active cases per country...

In [29]:
Active_per_country = data_per_country.groupby(["Country/Region"])["Active"].sum().reset_index().sort_values("Active",ascending=False).reset_index(drop=True)

fig = go.Figure(data=[go.Scatter(x=Active_per_country['Country/Region'][0:10],y=Active_per_country['Active'][0:10],mode='markers',marker=dict(color=10+np.random.randn(200),size=Active_per_country['Active'][0:10]/2500000,showscale=True))])

fig.update_layout(title='Active cases in 10 most infected Countries',xaxis_title="Countries",yaxis_title="Active Cases",template='plotly_white')
fig.show()

### Deaths per country

In [38]:
Deaths_per_country = data_per_country.groupby(["Country/Region"])["Deaths"].sum().reset_index().sort_values("Deaths",ascending=False).reset_index(drop=True)

fig = go.Figure(data=[go.Scatter(x=Deaths_per_country['Country/Region'][0:10],y=Deaths_per_country['Deaths'][0:10],mode='markers',marker=dict(color=[145, 140, 135, 130, 125, 120,115,110,105,100],size=Deaths_per_country['Deaths'][0:10]/100000,showscale=True))])

fig.update_layout(title='Most 10 infected Countries',xaxis_title="Countries",yaxis_title="Deaths",template='plotly_white')
fig.show()

## Data for my Country India.

In [39]:
Data_India = data [(data['Country/Region'] == 'India') ].reset_index(drop=True)
Data_India.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active
0,431,01/30/2020,Unknown,India,1/30/20 16:00,1,0,0,1
1,492,01/31/2020,Unknown,India,1/31/2020 23:59,1,0,0,1
2,548,02/01/2020,Unknown,India,1/31/2020 8:15,1,0,0,1
3,608,02/02/2020,Unknown,India,2020-02-02T06:03:08,2,0,0,2
4,673,02/03/2020,Unknown,India,2020-02-03T21:43:02,3,0,0,3


### Confirmed, Active and Recovered cases in India 

In [41]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Data_India['ObservationDate'], y=Data_India['Confirmed'],
                    mode='lines',
                    name='Confirmed cases'))

fig.add_trace(go.Scatter(x=Data_India['ObservationDate'], y=Data_India['Active'],
                    mode='lines',
                    name='Active cases',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=Data_India['ObservationDate'], y=Data_India['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=Data_India['ObservationDate'], y=Data_India['Recovered'],
                    mode='lines',
                    name='Recovered cases',marker_color='green'))
fig.update_layout(
    title='Evolution of cases over time in Tunisia',
        template='plotly_white'

)

fig.show()

### A better view

In [42]:
Data_India = data [(data['Country/Region'] == 'India') ].reset_index(drop=True)
Data_India = Data_India[Data_India['ObservationDate'] == max(Data_India['ObservationDate'])].reset_index()

# Get latest data...
Data_India_last= Data_India.groupby(["Country/Region"])["Confirmed","Deaths","Recovered","Active"].sum().reset_index().reset_index(drop=True)

labels = ["Active cases", "Recovered", "Deaths"]
values = Data_India_last.loc[0, ["Active", "Recovered", "Deaths"]]
fig = px.pie(Data_India_last, values=values, names=labels, color_discrete_sequence=px.colors.sequential.RdBu)
fig.update_layout(title='Total cases in India : '+str(Data_India_last["Confirmed"][0]))
fig.show()

### Data by state

In [43]:
Data_India_per_state= Data_India.groupby(["Province/State"])["Confirmed","Deaths","Recovered","Active"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)

fig = px.treemap(Data_India_per_state, path=['Province/State'], values=Data_India_per_state['Confirmed'], height=700, title='Confirmed cases in India', color_discrete_sequence = px.colors.sequential.RdBu)
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Active cases by state

In [44]:
fig = px.treemap(Data_India_per_state, path=['Province/State'], values=Data_India_per_state['Active'], height=1000,
                 title='Active cases in India', color_discrete_sequence = px.colors.sequential.Agsunset)
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Recovered cased by state

In [46]:
fig = px.treemap(Data_India_per_state, path=['Province/State'], values=Data_India_per_state['Recovered'], height=700,
                 title='Recovered cases in India', color_discrete_sequence = px.colors.sequential.Aggrnyl)
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Deaths by state

In [47]:
fig = px.treemap(Data_India_per_state, path=['Province/State'], values=Data_India_per_state['Deaths'], height=700,
                 title='Deaths in India', color_discrete_sequence = px.colors.sequential.Cividis)
fig.data[0].textinfo = 'label+text+value'
fig.show()

### Conclusion
- We analyzed the Covid19 data and observed how world is doing.
- We saw how COVID19 spread quickly and why it's classified as a pandemic.
- Finally, we looked how India is doing.