In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib widget

# Enter the name of the country that you are intersted in.

In [2]:
country = 'egypt'
country = country.capitalize()

## Now we import our data from https://github.com/CSSEGISandData/COVID-19
## Each data is imported separtly from its link

In [3]:
cases = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [4]:
deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

In [5]:
recoverd = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

## Now we drop unwanted columns then transpose our data from several columns to two roes to be easily handled then reset index and rename the new columns names

In [6]:
cases = cases[cases['Country/Region'] == country].drop(['Province/State', 'Lat', 'Long', 'Country/Region'], 1).transpose()
cases = cases.reset_index()
cases = cases.rename(columns={cases.columns[0]:'date', cases.columns[1]:'number_of_cases'})

In [7]:
cases

Unnamed: 0,date,number_of_cases
0,1/22/20,0
1,1/23/20,0
2,1/24/20,0
3,1/25/20,0
4,1/26/20,0
...,...,...
364,1/20/21,158963
365,1/21/21,159715
366,1/22/21,160463
367,1/23/21,161143


In [8]:
deaths = deaths[deaths['Country/Region'] == country].drop(['Province/State', 'Lat', 'Long', 'Country/Region'], 1).transpose()
deaths = deaths.reset_index()
deaths = deaths.rename(columns={deaths.columns[0]:'date', deaths.columns[1]:'number_of_deaths'})

In [9]:
deaths

Unnamed: 0,date,number_of_deaths
0,1/22/20,0
1,1/23/20,0
2,1/24/20,0
3,1/25/20,0
4,1/26/20,0
...,...,...
364,1/20/21,8747
365,1/21/21,8801
366,1/22/21,8853
367,1/23/21,8902


In [10]:
recoverd = recoverd[recoverd['Country/Region'] == country].drop(['Province/State', 'Lat', 'Long', 'Country/Region'], 1).transpose()
recoverd = recoverd.reset_index()
recoverd = recoverd.rename(columns={recoverd.columns[0]:'date', recoverd.columns[1]:'number_of_recoverd'})

In [11]:
recoverd

Unnamed: 0,date,number_of_recoverd
0,1/22/20,0
1,1/23/20,0
2,1/24/20,0
3,1/25/20,0
4,1/26/20,0
...,...,...
364,1/20/21,124605
365,1/21/21,125171
366,1/22/21,125603
367,1/23/21,126176


## Now we combine the three data frames in one and the change the type of date column from string to datatime

In [12]:
covid_data = pd.concat([cases, deaths['number_of_deaths'], recoverd['number_of_recoverd']], 1)

In [13]:
covid_data['date'] = pd.to_datetime(covid_data['date'])

## Data collected is add to the previous day, so will calculate the diff between every row to get the data for every single day alone.

In [14]:
every_day_data = covid_data[['number_of_cases', 'number_of_deaths', 'number_of_recoverd']].diff().dropna()

In [15]:
every_day_data = pd.concat([covid_data['date'], every_day_data], 1)

# Visualize the final data table we got that contain data after curing

In [16]:
every_day_data.dropna(inplace=True)

In [17]:
every_day_data

Unnamed: 0,date,number_of_cases,number_of_deaths,number_of_recoverd
1,2020-01-23,0.0,0.0,0.0
2,2020-01-24,0.0,0.0,0.0
3,2020-01-25,0.0,0.0,0.0
4,2020-01-26,0.0,0.0,0.0
5,2020-01-27,0.0,0.0,0.0
...,...,...,...,...
364,2021-01-20,789.0,51.0,511.0
365,2021-01-21,752.0,54.0,566.0
366,2021-01-22,748.0,52.0,432.0
367,2021-01-23,680.0,49.0,573.0


# Data analysis for Covid-19 cases

In [18]:
f"The total sum of all cases in {country} is {np.sum(every_day_data['number_of_cases'])} till date {every_day_data.iloc[-1, 0]}"

'The total sum of all cases in Egypt is 161817.0 till date 2021-01-24 00:00:00'

In [19]:
#Detremine the day with the highest number of infections
case_peak_day = every_day_data.loc[every_day_data['number_of_cases'] == np.max(every_day_data['number_of_cases']), 'date'].to_string().split()[1]

In [20]:
case_peak_day

'2020-06-19'

In [21]:
#Detremine the month that has the day with the highest number on infections, so it may be the peak month.
month_of_case_peak_day= int(case_peak_day.split('-')[1])

In [22]:
month_of_case_peak_day

6

In [23]:
f"The max number of infections occured per day was {np.max(every_day_data['number_of_cases'])} which occured on date {case_peak_day}"

'The max number of infections occured per day was 1774.0 which occured on date 2020-06-19'

In [44]:
# Plotting number of cases per day

fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    every_day_data['date'],
    every_day_data['number_of_cases'],
     label='Numbers of Covid-19 cases'
)
ax.set(xlabel="Date",
       ylabel="Number of cases per day",
       title=f"Covid-19 cases in {country} till date {every_day_data.iloc[-1, 0]}")
ax.axvline(pd.Timestamp(case_peak_day), color='r', label= f'The Day with highest cases number in {country}\n{case_peak_day}')
ax.legend()
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Data analysis for Covid-19 deaths

In [25]:
f"The total sum of all deaths in {country} is {np.sum(every_day_data['number_of_deaths'])} till date {every_day_data.iloc[-1, 0]}"

'The total sum of all cases in Egypt is 8959.0 till date 2021-01-24 00:00:00'

In [53]:
#Detremine the day with the highest number of deaths
death_peak_day = every_day_data.loc[every_day_data['number_of_deaths'] == np.max(every_day_data['number_of_deaths']), 'date'].to_string().split()[1]

In [54]:
death_peak_day

'2020-06-15'

In [55]:
#Detremine the month that has the day with the highest number of deaths.
month_of_death_peak_day= int(death_peak_day.split('-')[1])

In [56]:
month_of_death_peak_day

6

In [30]:
f"The max number of deaths occured per day was {np.max(every_day_data['number_of_deaths'])} which occured on date {death_peak_day}"

'The max number of deaths occured per day was 97.0 which occured on date 2020-06-15'

In [39]:
# Plotting number of deaths per day

fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    every_day_data['date'],
    every_day_data['number_of_deaths'],
     label='Numbers of Covid-19 deaths'
)
ax.set(xlabel="Date",
       ylabel="Number of deaths per day",
       title=f"Covid-19 deaths in {country} till date {every_day_data.iloc[-1, 0]}")
ax.axvline(pd.Timestamp(death_peak_day), color='r', label= f'The day with highest death number in {country}\n{death_peak_day}')
ax.legend()
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Data analysis for Covid-19 recovered

In [47]:
f"The total sum of all recovers in {country} is {np.sum(every_day_data['number_of_recoverd'])} till date {every_day_data.iloc[-1, 0]}"

'The total sum of all recovers in Egypt is 126497.0 till date 2021-01-24 00:00:00'

In [48]:
#Detremine the day with the highest number of deaths
recover_peak_day = every_day_data.loc[every_day_data['number_of_recoverd'] == np.max(every_day_data['number_of_recoverd']), 'date'].to_string().split()[1]

In [49]:
recover_peak_day

'2020-08-06'

In [51]:
#Detremine the month that has the day with the highest number of deaths.
month_of_recover_peak_day= int(recover_peak_day.split('-')[1])

In [52]:
month_of_recover_peak_day

8

In [58]:
f"The max number of recoverd occured per day was {np.max(every_day_data['number_of_recoverd'])} which occured on date {recover_peak_day}"

'The max number of recoverd occured per day was 1716.0 which occured on date 2020-08-06'

In [62]:
# Plotting number of recoverd per day

fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    every_day_data['date'],
    every_day_data['number_of_recoverd'],
     label='Numbers of Covid-19 recoverd'
)
ax.set(xlabel="Date",
       ylabel="Number of recoverd per day",
       title=f"Covid-19 recoverd in {country} till date {every_day_data.iloc[-1, 0]}")
ax.axvline(pd.Timestamp(recover_peak_day), color='r', label= f'The day with highest recoverd number in {country}\n{recover_peak_day}')
ax.legend()
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Data anlaysis for the month with high infection number, a month before and a month after are also included.

In [63]:
every_day_data2 = every_day_data

In [64]:
every_day_data2

Unnamed: 0,date,number_of_cases,number_of_deaths,number_of_recoverd
1,2020-01-23,0.0,0.0,0.0
2,2020-01-24,0.0,0.0,0.0
3,2020-01-25,0.0,0.0,0.0
4,2020-01-26,0.0,0.0,0.0
5,2020-01-27,0.0,0.0,0.0
...,...,...,...,...
364,2021-01-20,789.0,51.0,511.0
365,2021-01-21,752.0,54.0,566.0
366,2021-01-22,748.0,52.0,432.0
367,2021-01-23,680.0,49.0,573.0


In [65]:
#Divide data by month in order to get the data of every month separately
months = list(every_day_data2.groupby(pd.Grouper(key='date', freq='M'))) 

In [66]:
data_of_month_of_peak = months[month_of_case_peak_day-1][1]
data_of_month_before_peak = months[month_of_case_peak_day-2][1]
data_of_month_after_peak = months[month_of_case_peak_day][1]

In [67]:
data_of_peak_period_3_months = pd.concat([data_of_month_before_peak,data_of_month_of_peak, data_of_month_after_peak])

In [68]:
data_of_peak_period_3_months

Unnamed: 0,date,number_of_cases,number_of_deaths,number_of_recoverd
100,2020-05-01,358.0,14.0,79.0
101,2020-05-02,298.0,9.0,62.0
102,2020-05-03,272.0,14.0,40.0
103,2020-05-04,348.0,7.0,70.0
104,2020-05-05,388.0,16.0,98.0
...,...,...,...,...
187,2020-07-27,420.0,46.0,1007.0
188,2020-07-28,465.0,39.0,1121.0
189,2020-07-29,409.0,37.0,1066.0
190,2020-07-30,401.0,46.0,1211.0


In [70]:
fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    data_of_peak_period_3_months['date'],
    data_of_peak_period_3_months['number_of_cases']
)
ax.set(xlabel="Date",
       ylabel="Number of cases per day",
       title=f"Covid-19 cases in {country} through peak time months : {month_of_case_peak_day-1}, {month_of_case_peak_day} and {month_of_case_peak_day+1}")
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

#### From the previous graph we notice that the cases increased during May to reah its peak through June  to reach high as 1774 case per day and then decliend in July under 400 case per day. (This was the first wave)

In [71]:
fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    data_of_peak_period_3_months['date'],
    data_of_peak_period_3_months['number_of_deaths']
)
ax.set(xlabel="Date",
       ylabel="Number of deaths per day",
       title=f"Covid-19 deaths in {country} through peak time months : {month_of_case_peak_day-1}, {month_of_case_peak_day} and {month_of_case_peak_day+1}")
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

#### As cases graph deaths number followed the same manner

In [72]:
fig, ax = plt.subplots(figsize=(9,5))
ax.plot(
    data_of_peak_period_3_months['date'],
    data_of_peak_period_3_months['number_of_recoverd']
)
ax.set(xlabel="Date",
       ylabel="Number of recoverd per day",
       title=f"Covid-19 revocerd in {country} through peak time months : {month_of_case_peak_day-1}, {month_of_case_peak_day} and {month_of_case_peak_day+1}")
plt.setp(ax.get_xticklabels(), rotation=45)
plt.grid()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# percentage of each category

In [79]:
total_number_of_case = cases.iloc[-1,1]
total_number_of_death = deaths.iloc[-1,1]
total_number_of_recover = recoverd.iloc[-1,1]
number_of_active_cases = total_number_of_case - (total_number_of_death + total_number_of_recover)

In [90]:
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
categories = ['number_of_active_cases', 'total_number_of_deaths', 'total_number_of_recoverd']
percent = [number_of_active_cases, total_number_of_death, total_number_of_recover]
ax.pie(percent, labels = categories,autopct='%1.2f%%')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …