In [181]:
import pandas as pd
import numpy as np
import plotly.express as px
import os

### Data Preparation

#### 1. Loading the data

In [191]:
#Load the GDP data and metadata from the downloaded data files
gdp_data = pd.read_csv('Data/GPD Per Capita PPP.csv', skiprows = 4, index_col=1)
gdp_metadata = pd.read_csv('Data/GPD Per Capita PPP Metadata.csv', index_col=0)
#Drop the columns named 'Unnamed'
gdp_data.drop(gdp_data.filter(regex="Unnamed"),axis=1, inplace=True)
gdp_metadata.drop(gdp_metadata.filter(regex="Unnamed"),axis=1, inplace=True)

In [192]:
#Load the Maternal Mortality Rate and metadata from the downloaded data files
mortality_rate_data = pd.read_csv('Data/Maternal Mortality Rate Data.csv', skiprows = 4, index_col=1)
mortality_rate_metadata = pd.read_csv('Data/Maternal Mortality Rate Metadata.csv', index_col=0)
#Drop the columns named 'Unnamed'
mortality_rate_data.drop(mortality_rate_data.filter(regex="Unnamed"),axis=1, inplace=True)
mortality_rate_metadata.drop(mortality_rate_metadata.filter(regex="Unnamed"),axis=1, inplace=True)

In [193]:
#Merge the GDP data and metadata dataframes into one dataframe
gdp_df = pd.merge(gdp_metadata,gdp_data,on='Country Code')
gdp_df.sample()

Unnamed: 0_level_0,Region,IncomeGroup,SpecialNotes,TableName,Country Name,Indicator Name,Indicator Code,1960,1961,1962,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
COD,Sub-Saharan Africa,Low income,The World Bank systematically assesses the app...,"Congo, Dem. Rep.","Congo, Dem. Rep.","GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,...,731.932821,824.412438,877.680835,932.405165,1023.539833,1073.446906,1104.843368,1102.553517,1184.720486,1337.834149


In [194]:
#Merge the Mortality Rate data and metadata dataframes into one dataframe
mortality_rate_df = pd.merge(mortality_rate_metadata,mortality_rate_data,on='Country Code')
mortality_rate_df.sample()

Unnamed: 0_level_0,Region,IncomeGroup,SpecialNotes,TableName,Country Name,Indicator Name,Indicator Code,1960,1961,1962,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BGD,South Asia,Lower middle income,The reporting period for national accounts dat...,Bangladesh,Bangladesh,"Maternal mortality ratio (modeled estimate, pe...",SH.STA.MMRT,,,,...,219.0,219.0,212.0,196.0,183.0,172.0,157.0,123.0,,


### 2. Preparing the data


In [195]:
#Trim the GDP and malnutrition prevalance dataframes by dropping columns we will not use
gdp_df = gdp_df.drop(["IncomeGroup","TableName","Indicator Name","Indicator Code","SpecialNotes"], axis=1)
mortality_rate_df = mortality_rate_df.drop(["IncomeGroup","TableName","Indicator Name","Indicator Code","SpecialNotes"], axis=1)

In [196]:
#Use the pandas melt() function on the mortality rate and GDP dataframes to transpose the columns containing years into rows grouped by year
gdp_df = gdp_df.melt(ignore_index=False,id_vars=["Region", "Country Name"],var_name="Year",value_name="GDP").copy()
mortality_rate_df = mortality_rate_df.melt(ignore_index=False,id_vars=["Region", "Country Name"],var_name="Year",value_name="MortalityRate").copy()

In [199]:
gdp_df

Unnamed: 0_level_0,Region,Country Name,Year,GDP
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABW,Latin America & Caribbean,Aruba,1960,
AFE,,Africa Eastern and Southern,1960,
AFG,South Asia,Afghanistan,1960,
AFW,,Africa Western and Central,1960,
AGO,Sub-Saharan Africa,Angola,1960,
...,...,...,...,...
XKX,Europe & Central Asia,Kosovo,2022,14971.152056
YEM,Middle East & North Africa,"Yemen, Rep.",2022,
ZAF,Sub-Saharan Africa,South Africa,2022,15920.425410
ZMB,Sub-Saharan Africa,Zambia,2022,3975.600639


In [201]:
# Save the dataframes to csv files
gdp_df.to_csv('data/gdp.csv',index=True)
mortality_rate_df.to_csv('data/mortality.csv', index=True)

### 3. Visualizing and saving the data

In [200]:
#Plot a scatterplot of mortality rate data against GDP
fig = px.scatter(
                 gdp_df,
                 x=mortality_rate_df['MortalityRate'],
                 y=gdp_df['GDP'],
                 color=gdp_df['Region'],
                 hover_data=['Country Name'],
                 labels={
                     "x": "Maternal Mortality Rate",
                     "y": "GDP PPP",
                     "color": "Region"
                 })
fig.update_layout(
    title={
        'text': "GDP PPP vs Maternal Mortality Rate",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [172]:
# Create the directory that will hold the exported image
if not os.path.exists('exports'):
    os.mkdir('exports')

In [174]:
# Save the figure to the exports directory
fig.write_html("exports/maternal_mortality_gdp.html")