# This notebook shows the timeseries for Covid 19 in Africa
> * The first case in Africa was recorded on 14 Frebruary 2020 in Egypt
> * We will use data that we have processed in the data processing notebook
> * Demonstrating data in the form time series charts helps understand the progression of a disease outbreak/pandemic better by showing acceleration/deceleration 

In [21]:
import pandas as pd
# read the pickled worldcovid_df2 file
worldcovid_df2 = pd.read_pickle('data/worldcovid_df2_pickle.pkl')
worldcovid_df2

Unnamed: 0,Country/Region,Lat,Long,Date,Value
11781,Czechia,49.8175,15.472999999999999,2020-01-22,0
11662,Cyprus,35.1264,33.4299,2020-01-22,0
11543,Cuba,22.0,-80.0,2020-01-22,0
11424,Croatia,45.1,15.2,2020-01-22,0
11305,Cote d'Ivoire,7.54,-5.5471,2020-01-22,0
...,...,...,...,...,...
28561,Tunisia,34.0,9.0,2020-05-19,1044
2619,Belarus,53.7098,27.9534,2020-05-19,31508
12615,Ecuador,-1.8312,-78.1834,2020-05-19,34151
22730,New Zealand,-40.9006,174.886,2020-05-19,1503


In [22]:
#rename the column headings
worldcovid_df2.columns = ['CNTRY_NAME', 'Lat', 'Long', 'Date', 'Confirmed_Cases']
worldcovid_df2

Unnamed: 0,CNTRY_NAME,Lat,Long,Date,Confirmed_Cases
11781,Czechia,49.8175,15.472999999999999,2020-01-22,0
11662,Cyprus,35.1264,33.4299,2020-01-22,0
11543,Cuba,22.0,-80.0,2020-01-22,0
11424,Croatia,45.1,15.2,2020-01-22,0
11305,Cote d'Ivoire,7.54,-5.5471,2020-01-22,0
...,...,...,...,...,...
28561,Tunisia,34.0,9.0,2020-05-19,1044
2619,Belarus,53.7098,27.9534,2020-05-19,31508
12615,Ecuador,-1.8312,-78.1834,2020-05-19,34151
22730,New Zealand,-40.9006,174.886,2020-05-19,1503


# Create Dataframe for Covid Cases in Africa
* We will create a dataframe for covid cases in Africa only by merging with the Africa Population dataframe which contains African Countries only
* We will Merge using the name(country name) attribute which is a common column in both dataframes
* Merging dataframes add value to an existing dataframe by adding more information for example the population data will enable us to calculate incidence rate for Covid-19 in Africa

In [23]:
africapopdf1= pd.read_pickle('data/africapopdf1_pickle.pkl')

In [24]:
africapopdf1

Unnamed: 0,FIPS,CNTRY_NAME,Year,Pop_Mil
3,DZ,Algeria,2019,43.406
4,EG,Egypt,2019,99.064
5,LY,Libya,2019,6.777
6,MA,Morocco,2019,35.587
7,SD,Sudan,2019,42.813
...,...,...,...,...
59,BW,Botswana,2019,2.283
60,SZ,eSwatini,2019,1.096
61,LS,Lesotho,2019,2.125
62,,Namibia,2019,2.495


In [25]:
africacovdf = worldcovid_df2.merge(africapopdf1, on='CNTRY_NAME')
africacovdf

Unnamed: 0,CNTRY_NAME,Lat,Long,Date,Confirmed_Cases,FIPS,Year,Pop_Mil
0,Cote d'Ivoire,7.54,-5.5471,2020-01-22,0,CI,2019,25.514
1,Cote d'Ivoire,7.54,-5.5471,2020-01-23,0,CI,2019,25.514
2,Cote d'Ivoire,7.54,-5.5471,2020-01-24,0,CI,2019,25.514
3,Cote d'Ivoire,7.54,-5.5471,2020-01-25,0,CI,2019,25.514
4,Cote d'Ivoire,7.54,-5.5471,2020-01-26,0,CI,2019,25.514
...,...,...,...,...,...,...,...,...
6064,Zimbabwe,-20.0,30.0,2020-05-15,42,ZW,2019,14.645
6065,Zimbabwe,-20.0,30.0,2020-05-16,42,ZW,2019,14.645
6066,Zimbabwe,-20.0,30.0,2020-05-17,44,ZW,2019,14.645
6067,Zimbabwe,-20.0,30.0,2020-05-18,46,ZW,2019,14.645


# Create the first visualization : Time series for confirmed cases

In [26]:
#using plotly
import plotly.express as px

df = africacovdf
fig = px.line(df, x="Date", y="Confirmed_Cases", color='CNTRY_NAME', title='Africa Covid-19 TimeSeries Chart')
fig.show()

# Add a range slider

In [27]:
import plotly.graph_objects as go
import pandas as pd


fig = px.line(africacovdf, x='Date', y='Confirmed_Cases', color = 'CNTRY_NAME', title='Afica Covid Time Series Chart with Rangeslider')

fig.update_xaxes(rangeslider_visible=True)
fig.show()

# Next we will calculate incidence rate for Africa per 100K persons
> * Calcualting rates like incidence allows us to normalise/standardise data to enable objective comparisons between say countries.Rates can unmask the burden of a challenge. 
> * In this notebook we will use Cases per 100000 population

In [28]:
# rename africacovdf
incidence = africacovdf
incidence

Unnamed: 0,CNTRY_NAME,Lat,Long,Date,Confirmed_Cases,FIPS,Year,Pop_Mil
0,Cote d'Ivoire,7.54,-5.5471,2020-01-22,0,CI,2019,25.514
1,Cote d'Ivoire,7.54,-5.5471,2020-01-23,0,CI,2019,25.514
2,Cote d'Ivoire,7.54,-5.5471,2020-01-24,0,CI,2019,25.514
3,Cote d'Ivoire,7.54,-5.5471,2020-01-25,0,CI,2019,25.514
4,Cote d'Ivoire,7.54,-5.5471,2020-01-26,0,CI,2019,25.514
...,...,...,...,...,...,...,...,...
6064,Zimbabwe,-20.0,30.0,2020-05-15,42,ZW,2019,14.645
6065,Zimbabwe,-20.0,30.0,2020-05-16,42,ZW,2019,14.645
6066,Zimbabwe,-20.0,30.0,2020-05-17,44,ZW,2019,14.645
6067,Zimbabwe,-20.0,30.0,2020-05-18,46,ZW,2019,14.645


In [29]:
incidence.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6069 entries, 0 to 6068
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CNTRY_NAME       6069 non-null   object 
 1   Lat              6069 non-null   object 
 2   Long             6069 non-null   object 
 3   Date             6069 non-null   object 
 4   Confirmed_Cases  6069 non-null   object 
 5   FIPS             5950 non-null   object 
 6   Year             6069 non-null   int64  
 7   Pop_Mil          6069 non-null   float64
dtypes: float64(1), int64(1), object(6)
memory usage: 426.7+ KB


In [30]:
#convert the variable confirmed_cases to an int to allow calculations
incidence['Confirmed_Cases'] = incidence.Confirmed_Cases.astype(int)

In [31]:
#create new column with calcuated value cases_100k
incidence['Cases_100K'] = incidence['Confirmed_Cases'] / incidence['Pop_Mil'] / 10
incidence

Unnamed: 0,CNTRY_NAME,Lat,Long,Date,Confirmed_Cases,FIPS,Year,Pop_Mil,Cases_100K
0,Cote d'Ivoire,7.54,-5.5471,2020-01-22,0,CI,2019,25.514,0.000000
1,Cote d'Ivoire,7.54,-5.5471,2020-01-23,0,CI,2019,25.514,0.000000
2,Cote d'Ivoire,7.54,-5.5471,2020-01-24,0,CI,2019,25.514,0.000000
3,Cote d'Ivoire,7.54,-5.5471,2020-01-25,0,CI,2019,25.514,0.000000
4,Cote d'Ivoire,7.54,-5.5471,2020-01-26,0,CI,2019,25.514,0.000000
...,...,...,...,...,...,...,...,...,...
6064,Zimbabwe,-20.0,30.0,2020-05-15,42,ZW,2019,14.645,0.286787
6065,Zimbabwe,-20.0,30.0,2020-05-16,42,ZW,2019,14.645,0.286787
6066,Zimbabwe,-20.0,30.0,2020-05-17,44,ZW,2019,14.645,0.300444
6067,Zimbabwe,-20.0,30.0,2020-05-18,46,ZW,2019,14.645,0.314100


In [32]:
#incidence.Date = pd.to_datetime(incidence.Date)
#incidence

In [33]:
incidence.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6069 entries, 0 to 6068
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CNTRY_NAME       6069 non-null   object 
 1   Lat              6069 non-null   object 
 2   Long             6069 non-null   object 
 3   Date             6069 non-null   object 
 4   Confirmed_Cases  6069 non-null   int64  
 5   FIPS             5950 non-null   object 
 6   Year             6069 non-null   int64  
 7   Pop_Mil          6069 non-null   float64
 8   Cases_100K       6069 non-null   float64
dtypes: float64(2), int64(2), object(5)
memory usage: 474.1+ KB


# We will now plot an animated Choropleth Map for Africa Covid Cases
> * The use of maps allows us to visualize data in multiple dimensions
> * By using an animated map we are able to show the burden of cases, their location and the time progression in one visualization
> * We will use a geojson file for Africa for the mapping.GeoJSON is an open standard format designed for representing simple geographical features, along with their non-spatial attributes.

In [34]:
# First we will import the geojson data
import json
with open('data/Africa_World_PROD_1_Lsib_2017_Mar.geojson') as f:
  countries = json.load(f)

In [35]:
countries

{'type': 'FeatureCollection',
 'name': 'Africa_World_PROD_1_Lsib_2017_Mar',
 'crs': {'type': 'name',
  'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'}},
 'features': [{'type': 'Feature',
   'properties': {'CNTRY_NAME': 'Abyei Area',
    'ISO2': None,
    'ISO3': None,
    'INTLEXP': None,
    'PHSYSTEM': None,
    'DENURBAN': None,
    'TPOPURBAN': None,
    'POPAGE': None,
    'GOVTTRANS': None,
    'PRESSFREE': None,
    'CONFLICTM': None,
    'FDISPLACEM': None,
    'RISKTOTAL': None},
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[29.001938297714332, 9.679658439285618],
       [28.979938952320822, 9.648331326529558],
       [28.954950058098746, 9.628084049339463],
       [28.931341630411964, 9.609573453851851],
       [28.907733202725183, 9.591062858364296],
       [28.8841247750384, 9.572552262876684],
       [28.86051634825094, 9.554041667389072],
       [28.83690792056416, 9.535531071002197],
       [28.81329949287732, 9.517020475514585],
       [28.78663

In [36]:
import plotly.express as px

max_cases = incidence['Cases_100K'].max()
fig = px.choropleth_mapbox(incidence, geojson=countries, \
              color="Cases_100K", hover_name="CNTRY_NAME", \
              featureidkey="properties.CNTRY_NAME", locations="CNTRY_NAME",\
              animation_frame="Date",
              color_continuous_scale=px.colors.sequential.OrRd, \
              range_color=[0,max_cases])
fig.update_layout(
    #margin={"r":0,"t":0,"l":0,"b":0},
    autosize=True,
    height=770,
        mapbox = {
        'style': "carto-positron",
        'center': {'lon': 29.918900, 'lat': -3.373100},
        'zoom': 2
    },
    title={
        'text': "Coronavirus Cases in Africa Counties",
        'y':0.97,
        'x':0.45,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.write_html('data/choropleth_02.html')
fig.show()

In [37]:
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

max_cases = incidence['Cases_100K'].max()
fig = px.choropleth_mapbox(incidence, geojson=countries, \
              color="Cases_100K", hover_name="CNTRY_NAME", \
              featureidkey="properties.CNTRY_NAME", locations="CNTRY_NAME",\
              animation_frame="Date",
              color_continuous_scale=px.colors.sequential.OrRd, \
              range_color=[0,max_cases])
fig.update_layout(
    #margin={"r":0,"t":0,"l":0,"b":0},
    autosize=True,
    height=770,
    mapbox = {
        'style': "carto-positron",
        'center': {'lon': 29.918900, 'lat': -3.373100},
        'zoom': 5.5
    },
    title={
        'text': "Coronavirus Cases per 100k in Africa",
        'y':0.97,
        'x':0.45,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.write_html('data/choropleth_01.html')
fig.show()

# Bubble animated plotly map

In [None]:
import plotly.express as px
df8 = incidence
fig = px.scatter_geo(df8, locations="FIPS", color="CNTRY_NAME",
                     hover_name="CNTRY_NAME", size="Cases_100K",
                     
                     projection="natural earth")
fig.show()

In [None]:

import plotly.express as px
df8 = incidence
fig = px.scatter_geo(df8, locations="FIPS", color="Cases_100K",
                     hover_name="CNTRY_NAME", size="Cases_100K",
                     animation_frame="Date",
                     projection="natural earth")
fig.show()

# Heatmap of Confirmed Covid Cases in Africa
> * A heatmap is a data visualization technique that shows magnitude of a phenomenon eg disease outbreak as color in two dimensions. The variation in color describes intensity.A heatmap does not show geographical boundaries like a choropleth map

In [None]:
import plotly.graph_objects as go
import datetime
import numpy as np
np.random.seed(1)

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
min_date=incidence['Date'].min()
max_date=incidence['Date'].max()

In [None]:
min_date

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=incidence['Cases_100K'],
        x=incidence['Date'],
        y=incidence['CNTRY_NAME'],       
        colorscale='plasma',
        colorbar=dict(thickness=20,lenmode='fraction',len=0.4,
                      tickcolor='black',tickfont=dict(size=10),y=0.8,
                     title='Cases per 100k')
))

fig.update_layout(
    title='Heat Map, Cases/100k in Africa, '+min_date+' to '+max_date, 
    titlefont=dict(size=14),
    yaxis_nticks=len(df['CNTRY_NAME'].unique()),
    xaxis_nticks=len(df['Date'].unique()),
    width=1000,
    height=1600,
    yaxis=dict(title_text="Country", titlefont=dict(size=12),tickfont=dict(size=9)),
    xaxis=dict(title_text="Report Date",titlefont=dict(size=12),tickfont=dict(size=9)),
    margin=dict(l=50, r=50, b=100, t=100,pad=4)
)

fig.show()

# Timeseries Chart Covid Deaths in Africa

In [None]:
deaths_df2 = pd.read_pickle('data/deaths_df2_pickle.pkl')
deaths_df2

In [None]:
africadeaths_df = deaths_df2.merge(africapopdf1, on=['CNTRY_NAME'])

In [None]:
import plotly.graph_objects as go
import pandas as pd


fig = px.line(africadeaths_df, x='Date', y='Deaths', color = 'CNTRY_NAME', title='Afica Covid Deaths Time Series with Rangeslider')

fig.update_xaxes(rangeslider_visible=True)
fig.show()