<a href="https://colab.research.google.com/github/MohamedElhossin/Covid19_analysis/blob/master/preprocessingOnEgyptTourism.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import plotly.graph_objects as go
import os


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# **Import the datasets**

In [None]:
egypt_dataset = pd.read_excel('/content/gdrive/My Drive/Launchpad C5/data/egypt.xlsx')
sars_dataset = pd.read_csv('/content/gdrive/My Drive/Launchpad C5/data/sars_2003_complete_dataset_clean.csv') 
ebola_dataset = pd.read_csv('/content/gdrive/My Drive/Launchpad C5/data/ebola_2014_2016_clean.csv')
covid_dataset = pd.read_csv('/content/gdrive/My Drive/Launchpad C5/data/covid_19_data.csv')

# **Preprocessing on sars datasets**

In [None]:
# rename the columns names 

sars_dataset.rename(columns = {'Cumulative number of case(s)' : 'Confirmed' , 'Number of deaths' : 'Deaths' , 'Number recovered' : 'Recovered'} , inplace=True)

# group dataset by data

sars_dataset['Date'] = pd.to_datetime(sars_dataset['Date']).dt.date
sars_dataset_grouped = sars_dataset.groupby(['Date', 'Country]')['Confirmed' , 'Deaths', 'Recovered'].sum().reset_index()
sars_dataset_grouped.head()

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2003-03-17,167,4,0
1,2003-03-18,219,4,0
2,2003-03-19,264,9,0
3,2003-03-20,306,10,0
4,2003-03-21,350,10,0


# **Preprocessing on ebola dataset**

In [None]:
# select important columns
#-->ebola_dataset = ebola_dataset[['Date', 'Country', 'No. of confirmed, probable and suspected cases', 'No. of confirmed, probable and suspected deaths']]

# rename the columns
ebola_dataset.rename(columns = {'No. of confirmed, probable and suspected cases' : 'Confirmed', 'No. of confirmed, probable and suspected deaths': 'Deaths'} , inplace=True)


# group dataset by date
ebola_dataset['Date'] = pd.to_datetime(ebola_dataset['Date']).dt.date
ebola_dataset_grouped = ebola_dataset.groupby('Date')['Confirmed' , 'Deaths'].sum().reset_index()

# converting datatypes
ebola_dataset_grouped['Confirmed'] = ebola_dataset_grouped['Confirmed'].astype('int')
ebola_dataset_grouped['Deaths'] = ebola_dataset_grouped['Deaths'].astype('int')
ebola_dataset_grouped.head()


Unnamed: 0,Date,Confirmed,Deaths
0,2014-08-29,3071,1553
1,2014-09-05,3967,2105
2,2014-09-08,4293,2296
3,2014-09-12,4390,2226
4,2014-09-16,4985,2461


# **Preprocessing on ebola dataset**

In [None]:
# select important columns
#-->covid_dataset = covid_dataset['Date' , 'Country', 'Confirmed', 'Deaths', 'Recovered']

# Rename columns
covid_dataset.rename(columns= {'Country/Region' : 'Country' , 'Last Update' : 'Date'} , inplace=True)

# replace mainland china with china in country columns
covid_dataset['Country'].replace('Mainland China' , 'China')

# Group dataset by date
covid_dataset['Date'] = pd.to_datetime(covid_dataset['Date']).dt.date
covid_dataset_grouped = covid_dataset.groupby('Date')['Confirmed' , 'Deaths', 'Recovered'].sum().reset_index()
covid_dataset_grouped.head()


Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-01-22,555.0,17.0,28.0
1,2020-01-23,653.0,18.0,30.0
2,2020-01-24,941.0,26.0,36.0
3,2020-01-25,1438.0,42.0,39.0
4,2020-01-26,2118.0,56.0,52.0


# **Preprocessing on tourims Egypt dataset**

In [None]:
# Group dataset by date
egypt_dataset['Date'] = pd.to_datetime(egypt_dataset['Date']).dt.date
egypt_dataset_grouped = egypt_dataset.groupby('Date')['Number'].sum().reset_index()
egypt_dataset_grouped.head()

Unnamed: 0,Date,Number
0,2003-01-01,401000
1,2003-02-01,427000
2,2003-03-01,356000
3,2003-04-01,363000
4,2003-05-01,319000


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter( 
                 x = sars_dataset_grouped['Date'] , 
                 y = sars_dataset_grouped['Deaths'],
                 name = 'sars',
                 line_color = 'red',
                 opacity = 0.5))

fig.add_trace(go.Scatter( 
                 x = ebola_dataset_grouped['Date'] , 
                 y = ebola_dataset_grouped['Deaths'],
                 name = 'ebola',
                 line_color = 'black',
                 opacity = 0.5))


fig.add_trace(go.Scatter( 
                 x = covid_dataset_grouped['Date'] , 
                 y = covid_dataset_grouped['Deaths'],
                 name = 'covid',
                 line_color = 'blue',
                 opacity = 0.5))




fig.update_layout(
                  title_text="Death cases per month",
                  xaxis_rangeslider_visible=True)
fig.show()


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter( 
                 x = sars_dataset_grouped['Date'] , 
                 y = sars_dataset_grouped['Confirmed'],
                 name = 'sars',
                 line_color = 'red',
                 opacity = 0.5))

fig.add_trace(go.Scatter( 
                 x = ebola_dataset_grouped['Date'] , 
                 y = ebola_dataset_grouped['Confirmed'],
                 name = 'ebola',
                 line_color = 'black',
                 opacity = 0.5))


fig.add_trace(go.Scatter( 
                 x = covid_dataset_grouped['Date'] , 
                 y = covid_dataset_grouped['Confirmed'],
                 name = 'covid',
                 line_color = 'blue',
                 opacity = 0.5))

fig.add_trace(go.Scatter( 
                 x = egypt_dataset_grouped['Date'] , 
                 y = egypt_dataset_grouped['Number'],
                 name = 'Tourism',
                 line_color = 'green',
                 opacity = 0.2))

fig.update_layout(
                  title_text="Confirmed cases per month",
                  xaxis_rangeslider_visible=True)
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter( 
                 x = sars_dataset_grouped['Date'] , 
                 y = sars_dataset_grouped['Recovered'],
                 name = 'sars',
                 line_color = 'red',
                 opacity = 0.5))


fig.add_trace(go.Scatter( 
                 x = covid_dataset_grouped['Date'] , 
                 y = covid_dataset_grouped['Recovered'],
                 name = 'covid',
                 line_color = 'blue',
                 opacity = 0.5))

fig.update_layout(
                  title_text="Recovered cases per month",
                  xaxis_rangeslider_visible=True)
fig.show()

In [None]:
covid_dataset_grouped_country = covid_dataset.groupby(['Date','Country'])['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
covid_dataset_grouped_country = covid_dataset_grouped_country[covid_dataset_grouped_country['Date'] == max(covid_dataset_grouped_country['Date'])].reset_index()
covid_dataset_grouped_country = covid_dataset_grouped_country.groupby('Country')['Confirmed', 'Deaths', 'Recovered'].sum().reset_index()
covid_dataset_grouped_country.head() 

Unnamed: 0,Country,Confirmed,Deaths,Recovered
0,Afghanistan,16.0,0.0,0.0
1,Albania,42.0,1.0,0.0
2,Algeria,48.0,4.0,12.0
3,Antigua and Barbuda,1.0,0.0,0.0
4,Argentina,45.0,2.0,1.0


In [None]:
!pip install plotly==4.5.2



In [None]:
import plotly.express as px
fig = px.treemap(covid_dataset_grouped_country.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country"], values="Confirmed", title='COVID-19',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.show()

In [None]:
ebola_dataset_grouped_country = ebola_dataset.groupby(['Date' , 'Country'])['Confirmed', 'Deaths'].sum().reset_index()
ebola_dataset_grouped_country = ebola_dataset_grouped_country[ebola_dataset_grouped_country['Date'] == max(ebola_dataset_grouped_country['Date'])].reset_index()
ebola_dataset_grouped_country = ebola_dataset_grouped_country.groupby('Country')['Confirmed', 'Deaths'].sum().reset_index()
ebola_dataset_grouped_country.tail() 

Unnamed: 0,Country,Confirmed,Deaths
5,Senegal,1.0,0.0
6,Sierra Leone,14122.0,3955.0
7,Spain,1.0,0.0
8,United Kingdom,1.0,0.0
9,United States of America,4.0,1.0


In [None]:
fig = px.treemap(ebola_dataset_grouped_country.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country"], values="Confirmed", title='Ebola',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.show()

In [None]:
sars_dataset_grouped_country = sars_dataset.groupby(['Date' , 'Country'])['Confirmed', 'Deaths'].sum().reset_index()
sars_dataset_grouped_country = sars_dataset_grouped_country[sars_dataset_grouped_country['Date'] == max(sars_dataset_grouped_country['Date'])].reset_index()
sars_dataset_grouped_country = sars_dataset_grouped_country.groupby('Country')['Confirmed', 'Deaths'].sum().reset_index()
sars_dataset_grouped_country.tail()

Unnamed: 0,Country,Confirmed,Deaths
26,"Taiwan, China",671,84
27,Thailand,9,2
28,United Kingdom,4,0
29,United States,75,0
30,Viet Nam,63,5


In [None]:
fig = px.treemap(sars_dataset_grouped_country.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country"], values="Confirmed", title='Sars',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.show()