# The African COVID-19 Dashboard Project

In [1]:
import pandas as pd
import numpy as np
import re 
import glob 
import io 
import requests 
from datetime import date,timedelta 

import plotly.graph_objects as go 
import plotly.express as px 
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

# Load Data from the John Hopkins Data Project

In [2]:
#Load files from the web
file_date = date(2020,1,22)
dates =[]

while file_date <= date.today():
    dates.append(file_date)
    file_date += timedelta(days=1)

files = []
for file in dates:
    file = file.strftime("%m-%d-%Y")
    print(file)
    url = r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(file)
    raw_string = requests.get(url).content
    df = pd.read_csv(io.StringIO(raw_string.decode('utf-8')))
    df['date'] = pd.to_datetime(file)
    df.rename(columns={'Country_Region': 'Country/Region'}, inplace=True)
    files.append(df)

df = pd.concat(files, axis=0, ignore_index=True, sort=False)   

01-22-2020
01-23-2020
01-24-2020
01-25-2020
01-26-2020
01-27-2020
01-28-2020
01-29-2020
01-30-2020
01-31-2020
02-01-2020
02-02-2020
02-03-2020
02-04-2020
02-05-2020
02-06-2020
02-07-2020
02-08-2020
02-09-2020
02-10-2020
02-11-2020
02-12-2020
02-13-2020
02-14-2020
02-15-2020
02-16-2020
02-17-2020
02-18-2020
02-19-2020
02-20-2020
02-21-2020
02-22-2020
02-23-2020
02-24-2020
02-25-2020
02-26-2020
02-27-2020
02-28-2020
02-29-2020
03-01-2020
03-02-2020
03-03-2020
03-04-2020
03-05-2020
03-06-2020
03-07-2020
03-08-2020
03-09-2020
03-10-2020
03-11-2020
03-12-2020
03-13-2020
03-14-2020
03-15-2020
03-16-2020
03-17-2020
03-18-2020
03-19-2020
03-20-2020
03-21-2020
03-22-2020
03-23-2020
03-24-2020
03-25-2020
03-26-2020
03-27-2020
03-28-2020
03-29-2020
03-30-2020
03-31-2020
04-01-2020
04-02-2020
04-03-2020
04-04-2020
04-05-2020
04-06-2020


In [3]:
global_data = df.to_csv('global_data.csv', index=False)

In [54]:
global_data

# Data Exploration

In [4]:
df.head()

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,date,Latitude,Longitude,FIPS,Admin2,Province_State,Last_Update,Lat,Long_,Active,Combined_Key,404: Not Found
0,Anhui,Mainland China,1/22/2020 17:00,1.0,,,2020-01-22,,,,,,,,,,,
1,Beijing,Mainland China,1/22/2020 17:00,14.0,,,2020-01-22,,,,,,,,,,,
2,Chongqing,Mainland China,1/22/2020 17:00,6.0,,,2020-01-22,,,,,,,,,,,
3,Fujian,Mainland China,1/22/2020 17:00,1.0,,,2020-01-22,,,,,,,,,,,
4,Gansu,Mainland China,1/22/2020 17:00,,,,2020-01-22,,,,,,,,,,,


In [5]:
df.describe()

Unnamed: 0,Confirmed,Deaths,Recovered,Latitude,Longitude,FIPS,Lat,Long_,Active
count,53976.0,53554.0,53607.0,4799.0,4799.0,42154.0,46149.0,46149.0,46378.0
mean,298.582148,13.566102,78.809577,29.152227,6.297,30520.432106,36.658234,-81.564989,127.705162
std,3591.701795,267.501482,1563.57602,21.927711,84.758913,15749.739919,10.21479,40.363617,2181.334908
min,0.0,0.0,0.0,-41.4545,-157.8584,66.0,-51.7963,-170.132,-6.0
25%,0.0,0.0,0.0,21.0,-75.0152,18133.0,33.792015,-96.793718,0.0
50%,3.0,0.0,0.0,35.4437,11.6094,29151.0,37.976637,-88.268075,0.0
75%,19.0,0.0,0.0,42.756,89.0462,45077.0,41.671694,-81.217403,0.0
max,131646.0,15887.0,63945.0,72.0,178.065,99999.0,71.7069,178.065,91246.0


In [6]:
df.date.unique()

array(['2020-01-22T00:00:00.000000000', '2020-01-23T00:00:00.000000000',
       '2020-01-24T00:00:00.000000000', '2020-01-25T00:00:00.000000000',
       '2020-01-26T00:00:00.000000000', '2020-01-27T00:00:00.000000000',
       '2020-01-28T00:00:00.000000000', '2020-01-29T00:00:00.000000000',
       '2020-01-30T00:00:00.000000000', '2020-01-31T00:00:00.000000000',
       '2020-02-01T00:00:00.000000000', '2020-02-02T00:00:00.000000000',
       '2020-02-03T00:00:00.000000000', '2020-02-04T00:00:00.000000000',
       '2020-02-05T00:00:00.000000000', '2020-02-06T00:00:00.000000000',
       '2020-02-07T00:00:00.000000000', '2020-02-08T00:00:00.000000000',
       '2020-02-09T00:00:00.000000000', '2020-02-10T00:00:00.000000000',
       '2020-02-11T00:00:00.000000000', '2020-02-12T00:00:00.000000000',
       '2020-02-13T00:00:00.000000000', '2020-02-14T00:00:00.000000000',
       '2020-02-15T00:00:00.000000000', '2020-02-16T00:00:00.000000000',
       '2020-02-17T00:00:00.000000000', '2020-02-18

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53995 entries, 0 to 53994
Data columns (total 18 columns):
Province/State    4358 non-null object
Country/Region    53995 non-null object
Last Update       7617 non-null object
Confirmed         53976 non-null float64
Deaths            53554 non-null float64
Recovered         53607 non-null float64
date              53995 non-null datetime64[ns]
Latitude          4799 non-null float64
Longitude         4799 non-null float64
FIPS              42154 non-null float64
Admin2            42514 non-null object
Province_State    43795 non-null object
Last_Update       46378 non-null object
Lat               46149 non-null float64
Long_             46149 non-null float64
Active            46378 non-null float64
Combined_Key      46378 non-null object
404: Not Found    0 non-null object
dtypes: datetime64[ns](1), float64(9), object(8)
memory usage: 7.4+ MB


In [8]:
df.sample(5)

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,date,Latitude,Longitude,FIPS,Admin2,Province_State,Last_Update,Lat,Long_,Active,Combined_Key,404: Not Found
50213,,US,,3.0,0.0,0.0,2020-04-04,,,19145.0,Page,Iowa,2020-04-04 23:34:21,40.739317,-95.149781,0.0,"Page, Iowa, US",
4815,Alberta,Canada,2020-03-11T23:13:07,19.0,0.0,0.0,2020-03-11,53.9333,-116.5765,,,,,,,,,
20869,,US,,5.0,0.0,0.0,2020-03-25,,,49053.0,Washington,Utah,2020-03-25 23:33:19,37.280035,-113.504698,0.0,"Washington,Utah,US",
28955,,US,,1.0,0.0,0.0,2020-03-28,,,5043.0,Drew,Arkansas,2020-03-28 23:05:37,33.59035,-91.717779,0.0,"Drew, Arkansas, US",
35738,,US,,4.0,0.0,0.0,2020-03-30,,,1049.0,DeKalb,Alabama,2020-03-30 22:52:45,34.459469,-85.807829,0.0,"DeKalb, Alabama, US",


In [9]:
#Drop the non-essential fields
df.drop(['FIPS', 'Admin2', 'Combined_Key', '404: Not Found'],axis=1,inplace=True)

## List of African countries that are the focus of this dashboard

In [10]:
africa = ['Algeria',
'Angola',
'Benin',
'Botswana',
'Burkina Faso',
'Burundi',
'Cabo Verde',
'Cameroon',
'Central African Republic',
'Chad',
'Comoros',
'Côte d’Ivoire',
'Congo',
'Djibouti',
'Egypt',
'Equatorial Guinea',
'Eritrea',
'Eswatini',
'Ethiopia',
'Gabon',
'Gambia',
'Ghana',
'Guinea',
'Guinea-Bissau',
'Kenya',
'Lesotho',
'Liberia',
'Libya',
'Madagascar',
'Malawi',
'Mali',
'Mauritania',
'Mauritius',
'Morocco',
'Mozambique',
'Namibia',
'Niger',
'Nigeria',
'Rwanda',
'Sao Tome and Principe',
'Senegal',
'Seychelles',
'Sierra Leone',
'Somalia',
'South Africa',
'South Sudan',
'Sudan',
'Tanzania',
'Togo',
'Tunisia',
'Uganda',
'Zambia',
'Zimbabwe',
'Congo (Brazzaville)',
'Ivory Coast',
'Congo (Kinshasa)',
'Republic of the Congo',
'Gambia, The',
]

## Subset the african data from the overall data

In [11]:
df_africa = df[df['Country/Region'].isin(africa)]

In [12]:
df_africa.sample(4)

Unnamed: 0,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,date,Latitude,Longitude,Province_State,Last_Update,Lat,Long_,Active
21150,,Congo (Kinshasa),,48.0,2.0,0.0,2020-03-25,,,,2020-03-25 23:33:04,-4.322447,15.307045,46.0
34960,,Rwanda,,70.0,0.0,0.0,2020-03-29,,,,2020-03-29 23:08:13,-1.9403,29.8739,70.0
5207,,Congo (Kinshasa),2020-03-11T20:00:00,2.0,0.0,0.0,2020-03-13,-4.0383,21.7587,,,,,
3063,,Senegal,2020-03-02T20:23:16,1.0,0.0,0.0,2020-03-02,14.4974,-14.4524,,,,,


In [13]:
#Continue dropping non-required fields
df_africa.drop(['Province/State', 'Province_State', 'Last Update', 'Last_Update' ],axis=1,inplace=True)

In [14]:
df_africa.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,date,Latitude,Longitude,Lat,Long_,Active
264,Ivory Coast,1.0,,,2020-01-27,,,,,
1477,Egypt,1.0,0.0,0.0,2020-02-14,,,,,
1552,Egypt,1.0,0.0,0.0,2020-02-15,,,,,
1627,Egypt,1.0,0.0,0.0,2020-02-16,,,,,
1702,Egypt,1.0,0.0,0.0,2020-02-17,,,,,


In [15]:
df_africa.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1082 entries, 264 to 53993
Data columns (total 10 columns):
Country/Region    1082 non-null object
Confirmed         1082 non-null float64
Deaths            1081 non-null float64
Recovered         1081 non-null float64
date              1082 non-null datetime64[ns]
Latitude          367 non-null float64
Longitude         367 non-null float64
Lat               691 non-null float64
Long_             691 non-null float64
Active            691 non-null float64
dtypes: datetime64[ns](1), float64(8), object(1)
memory usage: 93.0+ KB


In [16]:
df_africa.describe()

Unnamed: 0,Confirmed,Deaths,Recovered,Latitude,Longitude,Lat,Long_,Active
count,1082.0,1081.0,1081.0,367.0,367.0,691.0,691.0,691.0
mean,72.451017,2.704903,5.79926,7.745332,13.629139,3.941792,16.964575,92.777135
std,199.026911,10.51898,21.623835,16.650312,18.198475,15.622182,20.189124,215.529667
min,0.0,0.0,0.0,-30.5595,-23.0418,-30.5595,-23.0418,1.0
25%,3.0,0.0,0.0,-0.8037,0.8248,-4.6796,0.8248,4.0
50%,8.0,0.0,0.0,9.082,11.5021,6.6111,17.8739,13.0
75%,39.0,1.0,1.0,15.9965,29.8739,14.4974,31.4659,70.0
max,1655.0,152.0,247.0,34.0,57.5522,33.886917,57.552152,1549.0


In [17]:
# Re-order the columns for readability
df_africa = df_africa[['date',
         'Country/Region',
         'Confirmed',
         'Deaths',
         'Recovered',
         'Lat',
          'Latitude', 'Long_','Longitude']]

# Fill missing values as 0; create Active cases column
df_africa['Confirmed'] = df_africa['Confirmed'].fillna(0).astype(int)
df_africa['Deaths'] = df_africa['Deaths'].fillna(0).astype(int)
df_africa['Recovered'] = df_africa['Recovered'].fillna(0).astype(int)
df_africa['Active'] = df_africa['Confirmed'] - df_africa['Deaths'] - df_africa['Recovered']

In [18]:
df_africa.sample(5)

Unnamed: 0,date,Country/Region,Confirmed,Deaths,Recovered,Lat,Latitude,Long_,Longitude,Active
34884,2020-03-29,Gabon,7,1,0,-0.8037,,11.6094,,6
31534,2020-03-28,Seychelles,8,0,0,-4.6796,,55.492,,8
51217,2020-04-04,Tunisia,553,18,5,33.886917,,9.537499,,530
6699,2020-03-18,Mauritania,1,0,0,,21.0079,,-10.9408,1
38316,2020-03-30,Eritrea,12,0,0,15.1794,,39.7823,,12


In [19]:
# Replace missing values for latitude
df_africa['Lat'] = df_africa.apply(
    lambda row: row['Latitude']if np.isnan(row['Lat']) else row['Lat'],
    axis=1
)

In [20]:
# Replace missing values for longitude
df_africa['Long_'] = df_africa.apply(
    lambda row: row['Longitude']if np.isnan(row['Long_']) else row['Long_'],
    axis=1
)

In [21]:
df_africa[df_africa['Long_'].isna()]
#Here we find that mainly Egypt,Algeria and Nigeria have missing latitude and longitude coordinates

Unnamed: 0,date,Country/Region,Confirmed,Deaths,Recovered,Lat,Latitude,Long_,Longitude,Active
264,2020-01-27,Ivory Coast,1,0,0,,,,,1
1477,2020-02-14,Egypt,1,0,0,,,,,1
1552,2020-02-15,Egypt,1,0,0,,,,,1
1627,2020-02-16,Egypt,1,0,0,,,,,1
1702,2020-02-17,Egypt,1,0,0,,,,,1
1777,2020-02-18,Egypt,1,0,0,,,,,1
1853,2020-02-19,Egypt,1,0,0,,,,,1
1929,2020-02-20,Egypt,1,0,0,,,,,1
2009,2020-02-21,Egypt,1,0,0,,,,,1
2093,2020-02-22,Egypt,1,0,0,,,,,1


In [22]:
df_africa.drop(['Latitude', 'Longitude'], axis=1, inplace=True)
df_africa.rename(columns={'Lat': 'Latitude', 'Long_': 'Longitude'}, inplace=True)

In [72]:
# Replace missing values for latitude and longitude
#df_africa['Lat'] = df_africa['Lat'].fillna(df.groupby('Country/Region')['Lat'].transform('mean'))
#df_africa['Long_'] = df_africa['Long_'].fillna(df.groupby('Country/Region')['Long_'].transform('mean'))

In [23]:
#Save the African Processed data to a csv file
african_data = df_africa.to_csv('africa_data.csv', index=False)

In [53]:
african_data

In [24]:
df_africa['Country/Region'].nunique()

53

In [25]:
# Deaths
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Deaths'].sum()

441

In [26]:
# active cases
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Confirmed'].sum() - \
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Deaths'].sum() - \
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Recovered'].sum()

7727

In [27]:
# confirmed
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Confirmed'].sum()

9037

In [28]:
# recovered
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Recovered'].sum()

869

In [29]:
# deaths
df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Deaths'].sum()

441

In [30]:
# World fatality rate
'{:.2f}%'.format(100 *
                df[df['date'] == df['date'].iloc[-1]]['Deaths'].sum() /
                df[df['date'] == df['date'].iloc[-1]]['Confirmed'].sum())

'5.45%'

In [31]:
# African fatality rate
'{:.2f}%'.format(100 *
                df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Deaths'].sum() /
                df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Confirmed'].sum())

'4.88%'

In [32]:
## Outline the specific regions of the African continent
east = ['Burundi',
 'Comoros',
 'Djibouti',
 'Eritrea',
 'Ethiopia',
 'Kenya',
 'Madagascar',
 'Malawi',
 'Mauritius',
 'Mozambique',
 'Rwanda',
 'Seychelles',
 'Somalia',
 'South Sudan',
 'Tanzania',
 'Uganda',
 'Zambia',
 'Zimbabwe']
north = [ 'Algeria', 'Egypt', 'Libya', 'Morocco', 'Tunisia', 'Sudan']
west = [
     'Benin',
 'Burkina Faso',
 'Cabo Verde',
 '''Côte d'Ivoire''',
 'Gambia',
 'Ghana',
 'Guinea',
 'Guinea-Bissau',
 'Liberia',
 'Mali',
 'Mauritania',
 'Niger',
 'Nigeria',
 'Senegal',
 'Sierra Leone',
 'Togo'
]
central = [ 'Cameroon',
 'Central African Republic',
 'Chad',
 'Congo (Brazzaville)',
 'Angola',
 'Equatorial Guinea',
 'Gabon',
 'Sao Tome and Príncipe',
 'Congo (Kinshasa)',
'Republic of the Congo',]
south = ['Botswana',
 'Eswatini',
 'Lesotho',
 'Namibia',
 'South Africa']

# Region Specific EDA - EAST AFRICA

In [33]:
def ea(df_africa):
    df_ea = df_africa[df_africa['Country/Region'].isin(east)]
    return df_ea

In [34]:
region = 'Uganda'

print('Fatality rate: {:.2f}%'.format(100 *
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Deaths'].sum() /
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Confirmed'].sum()))

fig = go.Figure()
fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Active'].sum(),
                name="Active cases"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Confirmed'].sum(),
                name="Total Confirmed"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Deaths'].sum(),
                name="Deaths"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Recovered'].sum(),
                name="Recovered"))

fig.update_layout(title="COVID-19 infections in {}".format(region),
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

Fatality rate: 0.00%


In [35]:
region = 'Kenya'

print('Fatality rate: {:.2f}%'.format(100 *
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Deaths'].sum() /
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Confirmed'].sum()))

fig = go.Figure()
fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Active'].sum(),
                name="Active cases"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Confirmed'].sum(),
                name="Total Confirmed"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Deaths'].sum(),
                name="Deaths"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Recovered'].sum(),
                name="Recovered"))

fig.update_layout(title="COVID-19 infections in {}".format(region),
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

Fatality rate: 2.82%


In [36]:
region = 'Rwanda'

print('Fatality rate: {:.2f}%'.format(100 *
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Deaths'].sum() /
                                     df[(df['Country/Region'] == region) &
                                        (df['date'] == df['date'].iloc[-1])]['Confirmed'].sum()))

fig = go.Figure()
fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Active'].sum(),
                name="Active cases"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Confirmed'].sum(),
                name="Total Confirmed"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Deaths'].sum(),
                name="Deaths"))

fig.add_trace(go.Scatter(
                x=df[df['Country/Region'] == region].groupby('date')['date'].first(),
                y=df[df['Country/Region'] == region].groupby('date')['Recovered'].sum(),
                name="Recovered"))

fig.update_layout(title="COVID-19 infections in {}".format(region),
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

Fatality rate: 0.00%


In [37]:
print('Fatality rate: {:.2f}%'.format(100 *
                                     df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Deaths'].sum() /
                                     df_africa[df_africa['date'] == df_africa['date'].iloc[-1]]['Confirmed'].sum()))

fig = go.Figure()
fig.add_trace(go.Scatter(
                x=df_africa.groupby('date')['date'].first(),
                y=df_africa.groupby('date')['Confirmed'].sum(),
                name="Total Confirmed"))


fig.add_trace(go.Scatter(
                x=df_africa.groupby('date')['date'].first(),
                y=df_africa.groupby('date')['Active'].sum(),
                name="Active cases"))

fig.add_trace(go.Scatter(
                x=df_africa.groupby('date')['date'].first(),
                y=df_africa.groupby('date')['Recovered'].sum(),
                name="Recovered"))

fig.add_trace(go.Scatter(
                x=df_africa.groupby('date')['date'].first(),
                y=df_africa.groupby('date')['Deaths'].sum(),
                name="Deaths"))

fig.update_layout(title="COVID-19 infections in all of Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

Fatality rate: 4.88%


In [58]:
fig = go.Figure()
for country in east:
    print('Fatality rate for ' + country + ': {:.2f}%'.format(100 *
                                     df[(df['Country/Region'] == country) &
                                        (df['date'] == df['date'].iloc[-1])]['Deaths'].sum() /
                                     df[(df['Country/Region'] == country) &
                                        (df['date'] == df['date'].iloc[-1])]['Confirmed'].sum()))

    fig.add_trace(go.Scatter(
                    x=df[df['Country/Region'] == country].groupby('date')['date'].first(),
                    y=df[df['Country/Region'] == country].groupby('date')['Active'].sum(),
                    name=country,
                    opacity=0.8))

fig.update_layout(title="Active COVID-19 cases in East Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

Fatality rate forBurundi: 0.00%
Fatality rate forComoros: nan%
Fatality rate forDjibouti: 0.00%
Fatality rate forEritrea: 0.00%
Fatality rate forEthiopia: 4.65%
Fatality rate forKenya: 2.82%
Fatality rate forMadagascar: 0.00%
Fatality rate forMalawi: 0.00%
Fatality rate forMauritius: 3.08%
Fatality rate forMozambique: 0.00%
Fatality rate forRwanda: 0.00%
Fatality rate forSeychelles: 0.00%
Fatality rate forSomalia: 0.00%
Fatality rate forSouth Sudan: 0.00%
Fatality rate forTanzania: 4.55%
Fatality rate forUganda: 0.00%
Fatality rate forZambia: 2.56%
Fatality rate forZimbabwe: 11.11%


# West Africa

In [39]:
fig = go.Figure()
for country in west:
    fig.add_trace(go.Scatter(
                    x=df_africa[df_africa['Country/Region'] == country].groupby('date')['date'].first(),
                    y=df_africa[df_africa['Country/Region'] == country].groupby('date')['Active'].sum(),
                    name=country,
                    opacity=0.8))

fig.update_layout(title="Active COVID-19 cases in West Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [40]:
fig = go.Figure()
for country in west:
    fig.add_trace(go.Scatter(
                    x=df_africa[df_africa['Country/Region'] == country].groupby('date')['date'].first(),
                    y=df_africa[df_africa['Country/Region'] == country].groupby('date')['Confirmed'].sum(),
                    name=country,
                    opacity=0.8))

fig.update_layout(title="Confirmed COVID-19 cases in West Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [41]:
fig = go.Figure()
for country in west:
    fig.add_trace(go.Scatter(
                    x=df_africa[df_africa['Country/Region'] == country].groupby('date')['date'].first(),
                    y=df_africa[df_africa['Country/Region'] == country].groupby('date')['Recovered'].sum(),
                    name=country,
                    opacity=0.8))              

fig.update_layout(title="Recovered COVID-19 cases in West Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [42]:
fig = go.Figure()
for country in west:
    fig.add_trace(go.Scatter(
                    x=df_africa[df_africa['Country/Region'] == country].groupby('date')['date'].first(),
                    y=df_africa[df_africa['Country/Region'] == country].groupby('date')['Deaths'].sum(),
                    name=country,
                    opacity=0.8))              

fig.update_layout(title="Death COVID-19 cases in West Africa",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [43]:
df3 = df[df['Country/Region'].isin(africa)]

data = df3[df3['date'] == df3['date'].iloc[-1]].groupby('Country/Region').agg({'Active': 'sum',
                                                                               'Long_': 'mean',
                                                                               'Lat': 'mean',
                                                                               'Country/Region': 'first'
                                                                            })

fig = go.Figure(data=go.Scattergeo(
        lon = data['Long_'],
        lat = data['Lat'],
        text = data['Country/Region'] + ', ' + data['Country/Region'] + ': ' + data['Active'].astype(str),
        mode = 'markers',
        marker_size = (100 * data['Active'] / data['Active'].max()),
        marker = dict(reversescale = False,
                      autocolorscale = False,
                      symbol = 'circle',
                      line = dict(width=1, color='rgba(102, 102, 102)'),
                      colorscale = 'Reds',
                      cmin = 0,
                      color = data['Active'],
                      cmax = data['Active'].max(),
                      colorbar_title="Active Cases")))

fig.update_layout(title = 'Number of active cases by African countries',
                  geo=dict(scope='africa',
                           
                           showland = True,
                           landcolor = "rgb(100, 125, 100)",
                           showcountries=True,
                           showsubunits=True,
                           showlakes=False,))
fig.show()

In [45]:

fig = go.Figure()
for region in df_africa['Country/Region'].unique():
    if df_africa[(df_africa['date'] == df_africa['date'].iloc[-1]) & (df_africa['Country/Region'] == region)]['Confirmed'].sum() > 100:
        fig.add_trace(go.Scatter(
                        x=df_africa[df_africa['Country/Region'] == region].groupby('date')['date'].first(),
                        y=df_africa[df_africa['Country/Region'] == region].groupby('date')['Confirmed'].sum(),
                        name=region,
                        hoverinfo='x+y+z+text+name',
                       ))

fig.update_layout(title="COVID-19 Confirmed Cases in Africa (Countries with greater than 100 confirmed cases)",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [46]:

fig = go.Figure()
for region in df_africa['Country/Region'].unique():
    if df_africa[(df_africa['date'] == df_africa['date'].iloc[-1]) & (df_africa['Country/Region'] == region)]['Confirmed'].sum() > 50:
        fig.add_trace(go.Scatter(
                        x=df_africa[df_africa['Country/Region'] == region].groupby('date')['date'].first(),
                        y=df_africa[df_africa['Country/Region'] == region].groupby('date')['Deaths'].sum(),
                        name=region,
                        hoverinfo='x+y+z+text+name',
                        stackgroup='one'))

fig.update_layout(title="COVID-19 Deaths in Africa (Countries with greater than 50 confirmed deaths)",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [49]:
fig = go.Figure()
for region in df_africa['Country/Region'].unique():
    if df_africa[(df_africa['date'] == df_africa['date'].iloc[-1]) & (df_africa['Country/Region'] == region)]['Confirmed'].sum() > 100:
        fig.add_trace(go.Scatter(
                        x=df_africa[df_africa['Country/Region'] == region].groupby('date')['date'].first(),
                        y=df_africa[df_africa['Country/Region'] == region].groupby('date')['Active'].sum(),
                        name=region,
                        hoverinfo='x+y+z+text+name',
                        ))

fig.update_layout(title="COVID-19 Active Cases in Africa (Countries with greater than 100 confirmed cases)",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [52]:
fig = go.Figure()
for region in df_africa['Country/Region'].unique():
    if df_africa[(df_africa['date'] == df_africa['date'].iloc[-1]) & (df_africa['Country/Region'] == region)]['Confirmed'].sum() > 50:
        fig.add_trace(go.Scatter(
                        x=df_africa[df_africa['Country/Region'] == region].groupby('date')['date'].first(),
                        y=df_africa[df_africa['Country/Region'] == region].groupby('date')['Recovered'].sum(),
                        name=region,
                        hoverinfo='x+y+z+text+name',
                        ))

fig.update_layout(title="COVID-19 Recovered Cases in Africa (Countries with greater than 50 confirmed recoveries)",
                  xaxis_title="Date",
                  yaxis_title="Number of Individuals")

fig.show()

In [75]:
value = df_africa[df_africa['date'] ==
                      df_africa['date'].iloc[-1]]['Confirmed'].sum()

In [76]:
value

9037

In [77]:
delta = df_africa[df_africa['date'] ==
                      df_africa['date'].unique()[-2]]['Confirmed'].sum()

In [78]:
delta

8347