In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import os

### Data Preparation

#### 1. Loading the data

In [2]:
#Load the GDP data and metadata from the downloaded data files
gdp_data = pd.read_csv('Data/GPD Per Capita.csv', skiprows = 4, index_col=1)
gdp_metadata = pd.read_csv('Data/GPD Per Capita Metadata.csv', index_col=0)
#Drop the columns named 'Unnamed'
gdp_data.drop(gdp_data.filter(regex="Unnamed"),axis=1, inplace=True)
gdp_metadata.drop(gdp_metadata.filter(regex="Unnamed"),axis=1, inplace=True)

In [3]:
#Load the Maternal Mortality Rate and metadata from the downloaded data files
mortality_rate_data = pd.read_csv('Data/Maternal Mortality Rate Data.csv', skiprows = 4, index_col=1)
mortality_rate_metadata = pd.read_csv('Data/Maternal Mortality Rate Metadata.csv', index_col=0)
#Drop the columns named 'Unnamed'
mortality_rate_data.drop(mortality_rate_data.filter(regex="Unnamed"),axis=1, inplace=True)
mortality_rate_metadata.drop(mortality_rate_metadata.filter(regex="Unnamed"),axis=1, inplace=True)

In [4]:
#Merge the GDP data and metadata dataframes into one dataframe
gdp_df = pd.merge(gdp_metadata,gdp_data,on='Country Code')
gdp_df.sample()

Unnamed: 0_level_0,Region,IncomeGroup,SpecialNotes,TableName,Country Name,Indicator Name,Indicator Code,1960,1961,1962,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BMU,North America,High income,,Bermuda,Bermuda,GDP per capita (current US$),NY.GDP.PCAP.CD,1902.402085,1961.538135,2020.385929,...,99471.638898,98467.683994,102005.625642,106885.878489,111820.581466,113050.736882,116153.166122,107791.886435,111774.669092,118774.790657


In [5]:
#Merge the Mortality Rate data and metadata dataframes into one dataframe
mortality_rate_df = pd.merge(mortality_rate_metadata,mortality_rate_data,on='Country Code')
mortality_rate_df.sample()

Unnamed: 0_level_0,Region,IncomeGroup,SpecialNotes,TableName,Country Name,Indicator Name,Indicator Code,1960,1961,1962,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZMB,Sub-Saharan Africa,Lower middle income,National accounts data were rebased to reflect...,Zambia,Zambia,"Maternal mortality ratio (modeled estimate, pe...",SH.STA.MMRT,,,,...,191.0,169.0,166.0,155.0,156.0,145.0,129.0,135.0,,


### 2. Preparing the data


In [6]:
#Trim the GDP and malnutrition prevalance dataframes by dropping columns we will not use
gdp_df = gdp_df.drop(["IncomeGroup","TableName","Indicator Name","Indicator Code","SpecialNotes"], axis=1)
mortality_rate_df = mortality_rate_df.drop(["IncomeGroup","TableName","Indicator Name","Indicator Code","SpecialNotes"], axis=1)

In [7]:
#Use the pandas melt() function on the mortality rate and GDP dataframes to transpose the columns containing years into rows grouped by year
gdp_df = gdp_df.melt(ignore_index=False,id_vars=["Region", "Country Name"],var_name="Year",value_name="GDP").copy()
mortality_rate_df = mortality_rate_df.melt(ignore_index=False,id_vars=["Region", "Country Name"],var_name="Year",value_name="MortalityRate").copy()

In [8]:
# Save the dataframes to csv files
gdp_df.to_csv('data/gdp.csv',index=True)
mortality_rate_df.to_csv('data/mortality.csv', index=True)

### 3. Visualizing and saving the data

In [18]:
#Plot a scatterplot of mortality rate data against GDP
fig = px.scatter(
                 gdp_df,
                 x=gdp_df['GDP'],
                 y=mortality_rate_df['MortalityRate'],
                 color_discrete_sequence=px.colors.qualitative.Bold,
                 color=gdp_df['Region'],
                 hover_data=['Country Name'],
                 labels={
                     "x": "GDP Per Capita",
                     "y": "Maternal Mortality Rate",
                     "color": "Region"
                 })
fig.update_layout(
    title={
        'text': "Maternal Mortality Rate against GDP Per Capita",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [19]:
# Create the directory that will hold the exported image
if not os.path.exists('charts'):
    os.mkdir('charts')

In [20]:
# Save the figure to the exports directory
fig.write_html("charts/maternal_mortality_gdp.html")