# This notebook examines trends for some of the Social Determinants of Health for Africa before Covid-19 and also the relationship between these and reported cases: The Causes of the Causes!!
> * In this notebook we will mostly use the merge operation to increase the value of data
> * We will pickle these data frames from the data processing notebook
> * we will then use line plots, maps and bubble charts to visualise this data

# We will process and analyse GDP data first

In [None]:
#import pandas 
import pandas as pd
#read the pickled gdp dataframe
gdpdf = pd.read_pickle('data/gdpdf_pickle.pkl')
gdpdf

In [None]:
#rename the column headings to enable merging with other Dataframes
gdpdf.columns = ['CNTRY_NAME', 'Country Code', 'year', 'GDP']

In [None]:
#pickle population dataframe
africapopdf1= pd.read_pickle('data/africapopdf1_pickle.pkl')
africapopdf1

In [None]:
# merge population data frame and gdp dataframe
gdppopdf = africapopdf1.merge(gdpdf, on='CNTRY_NAME')

# GDP Per Capita Trends in Africa 2010-2018
> * We will now use a line chart to plot GDP Trends accross Africa
> * For enhanced effect we will also plot an animated choropleth map to visualize the same data

In [None]:
#using plotly
import plotly.express as px

fig = px.line(gdppopdf, x="year", y="GDP", color='CNTRY_NAME', title='GDP Per Capita Trends 2010-2018 in Africa')
fig.show()

In [None]:
# First we will import the geojson data
import json
with open('data/Africa_World_PROD_1_Lsib_2017_Mar.geojson') as f:
  countries = json.load(f)

In [None]:
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

max_gdp = gdppopdf['GDP'].max()
fig = px.choropleth_mapbox(gdppopdf, geojson=countries, \
              color="GDP", hover_name="CNTRY_NAME", \
              featureidkey="properties.CNTRY_NAME", locations="CNTRY_NAME",\
              animation_frame="year",
              color_continuous_scale=px.colors.sequential.OrRd, \
              range_color=[0,max_gdp])
fig.update_layout(
    #margin={"r":0,"t":0,"l":0,"b":0},
    autosize=True,
    height=770,
    mapbox = {
        'style': "carto-positron",
        'center': {'lon': 32, 'lat': 1},
        'zoom': 2
    },
    title={
        'text': "Africa GDP Per Capita 2015-2018",
        'y':0.97,
        'x':0.45,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

# Plot Bubble Chart : Confirmed Cases and Deaths Versus GPD
### A bubble chart is a scatter plot in which a third dimension of the data is shown through the size of markers
> * For example we will merge the social determinants data frames with the cumulative cases dataframe and calculate incidences and deaths rates.We will then plot Social Determinants, Incidence and Deaths rates in the same visualization(Bubble Chart)

In [None]:
gdppopdf

In [None]:
# select GDP for 2018 only
gdppopdf1 = gdppopdf[(gdppopdf.year == "2018")]
gdppopdf1

In [None]:
# read pickled cumulative cases data frame
cumcases_df = pd.read_pickle('data/cumcases_df_pickle.pkl')
cumcases_df

In [None]:
#rename the column headings
cumcases_df.columns = ['OBJECTID', 'CNTRY_NAME', 'Last_Update', 'Lat', 'Long_', 'Confirmed', 'Deaths', 'Recovered']

In [None]:
#merge GDP population and cases dataframes to create composite dataframe
composite_df = cumcases_df.merge(gdppopdf1, on='CNTRY_NAME')
composite_df

In [None]:
#calculate incidence and deaths rate
composite_df['Cases_100K'] = composite_df['Confirmed'] / composite_df['Pop_Mil'] / 10
composite_df['Deaths_100K'] = composite_df['Deaths'] / composite_df['Pop_Mil'] / 10
composite_df

In [None]:
# Use plotly to create bubble charts
import plotly.express as px
composite_df

fig = px.scatter(composite_df, x="Cases_100K", y="GDP",
             size="Deaths_100K", color="CNTRY_NAME",
                 hover_name="CNTRY_NAME", title='Covid Cases/100k and Deaths VS GDP Per Capita', log_x=False, size_max=60)
fig.show()

# We will now look at employment rates in Africa  Covid-19
> * We will now process employment data the same way we did with GDP data

In [None]:
# read the pickled education dataframe
empdf = pd.read_pickle('data/empdf_pickle.pkl')
empdf

In [None]:
# rename columns
empdf.columns = ['CNTRY_NAME', 'Country Code', 'year', 'emp_rate']

In [None]:
# merge with African Population Dataframe
emppopdf = empdf.merge(africapopdf1, on = 'CNTRY_NAME')

In [None]:
#using plotly plot employment rate trends
import plotly.express as px

fig = px.line(emppopdf, x="year", y="emp_rate", color='CNTRY_NAME', title='Africa Employement Trends 2010-2018')
fig.show()

In [None]:
# Select employment data for 2018 only
emppopdf1 = emppopdf[(emppopdf.year == "2018")]

In [None]:
#create second composite dataframe for employment rates and cases
composite_df1 = emppopdf1.merge(cumcases_df, on='CNTRY_NAME')
composite_df1

In [None]:
#calculate incidence
composite_df1['Cases_100K'] = composite_df1['Confirmed'] / composite_df1['Pop_Mil'] / 10
composite_df1['Deaths_100K'] = composite_df1['Deaths'] / composite_df1['Pop_Mil'] / 10

In [None]:
import plotly.express as px
composite_df1

fig = px.scatter(composite_df1, x="Cases_100K", y="emp_rate",
             size="Deaths_100K", color="CNTRY_NAME",
                 hover_name="CNTRY_NAME", title='Confirmed Cases and Deaths VS employment rates', log_x=False, size_max=60)
fig.show()

# Next we read the Human Development Index and Show the trends 
> * We will process HDI data the same way as GDP and Education

In [None]:
hdi_df = pd.read_pickle('data/hdi_df_pickle.pkl')
hdi_df

In [None]:
# rename columns
hdi_df.columns = ['CNTRY_NAME', 'Country Code', 'year', 'HDI']

In [None]:
# merge with population data frame
hdipopdf = hdi_df.merge(africapopdf1, on= 'CNTRY_NAME')
hdipopdf

In [None]:
#using plotly
import plotly.express as px

fig = px.line(hdipopdf, x="year", y="HDI", color='CNTRY_NAME', title='Africa Trends of Human Development Index')
fig.show()

In [None]:
# Select HDI data for 2018 only
hdipopdf1 = hdipopdf[(hdipopdf.year == "2011")]

In [None]:
#create third composite dataframe for HDI rates and cases
composite_df2 = hdipopdf1.merge(cumcases_df, on='CNTRY_NAME')
composite_df2

In [None]:
#calculate incidence
composite_df2['Cases_100K'] = composite_df2['Confirmed'] / composite_df2['Pop_Mil'] / 10
composite_df2['Deaths_100K'] = composite_df2['Deaths'] / composite_df2['Pop_Mil'] / 10

In [None]:
import plotly.express as px
composite_df2

fig = px.scatter(composite_df2, x="Cases_100K", y="HDI",
             size="Deaths_100K", color="CNTRY_NAME",
                 hover_name="CNTRY_NAME", title='Confirmed Cases and Deaths VS HDI', log_x=False, size_max=60)
fig.show()