In [47]:
import pandas as pd
import pycountry

## Chart 1: Maternaly Mortality vs GDP PC PPP

a helper function for country matching:

In [98]:
def ISO3(country_name):
    matches = { ## Hardcoding Strings pycountry won't catch
        'Bolivia (Plurinational State of)':'BOL',
        'Democratic Republic of the Congo':'COD',
        'Dem. Rep. Congo': 'COD',
        'Iran (Islamic Republic of)':'IRN',
        'Micronesia (Federated States of)':'FSM',
        'Türkiye': 'TUR',
        'TÃ¼rkiye': 'TUR',
        'Venezuela (Bolivarian Republic of)':'VEN',
        'Hong Kong SAR, China' : 'HKG',
        'SÃ£o TomÃ© and Principe' : 'STP'
    }
    if country_name in matches:
        return matches[country_name]
    try:
        return pycountry.countries.search_fuzzy(country_name)[0].alpha_3
    except:
        print(f'No Match for {country_name}')
        return None

In [117]:
def name_Flag(ISO3):
    country = pycountry.countries.get(alpha_3=ISO3)
    return(f'{country.name} {country.flag}')

In [255]:
def ISO3_flag(ISO3):
    country = pycountry.countries.get(alpha_3=ISO3)
    return(f'{country.flag}{ISO3}')

In [95]:
mm_df = pd.read_excel("Data 2023-01-02 15-07.xlsx", sheet_name="Data")
mm_df = mm_df[['Year', 'Country', 'WHO region', 'Value Numeric']]
mm_df['ISO3'] = mm_df.apply(lambda row: ISO3(row.Country) , axis = 1)
mm_df

Unnamed: 0,Year,Country,WHO region,Value Numeric,ISO3
0,2000,Afghanistan,Eastern Mediterranean,1450,AFG
1,2001,Afghanistan,Eastern Mediterranean,1390,AFG
2,2002,Afghanistan,Eastern Mediterranean,1300,AFG
3,2003,Afghanistan,Eastern Mediterranean,1240,AFG
4,2004,Afghanistan,Eastern Mediterranean,1180,AFG
...,...,...,...,...,...
3289,2013,Zimbabwe,Africa,509,ZWE
3290,2014,Zimbabwe,Africa,494,ZWE
3291,2015,Zimbabwe,Africa,480,ZWE
3292,2016,Zimbabwe,Africa,468,ZWE


In [48]:
pop_df = pd.read_excel("WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx", skiprows=16)
pop_df = pop_df[['Region, subregion, country or area *', 'ISO3 Alpha-code','Year', 'Total Population, as of 1 July (thousands)']]
pop_df.columns = ['Country', 'ISO3', 'Year', 'Population']

In [39]:
gdp_df = pd.read_csv('UNdata_Export_20230102_161208339.csv')
gdp_df['ISO3'] = gdp_df.apply(lambda row: ISO3(row['Country or Area']) , axis = 1)

In [127]:
df = pd.merge(left=mm_df,right=gdp_df, left_on=['ISO3','Year'], right_on=['ISO3','Year'], how='inner')
df = df[['ISO3', 'Country', 'Year', 'Value', 'Value Numeric', 'WHO region']]
df.columns = ['ISO3', 'Country', 'Year', 'GDP pc', 'Maternal Mortality', 'Region']

df = pd.merge(df, pop_df, how='inner', left_on=['ISO3','Year'], right_on=['ISO3','Year'])
df = df[['Year', 'ISO3', 'Country_x', 'Region', 'GDP pc', 'Maternal Mortality', 'Population']]
df.columns = ['Year', 'ISO3', 'Country', 'Region', 'GDP pc', 'Maternal Mortality', 'Population']

it'd be nice to have flags with the names:

In [265]:
countries_with_flags = dict(zip(list(set(df.ISO3)),[name_Flag(ISO3) for ISO3 in set(df.ISO3)])) #Avoiding minutes of repeat calls
ISO3s_with_flags = dict(zip(list(set(df.ISO3)),[ISO3_flag(ISO3) for ISO3 in set(df.ISO3)]))
df['Country'] = df.apply(lambda row: countries_with_flags[row.ISO3] , axis = 1)
df['ISO3_flag']  = df.apply(lambda row: ISO3s_with_flags[row.ISO3] , axis = 1)
df = df.drop_duplicates(['Year', 'ISO3'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Country'] = df.apply(lambda row: countries_with_flags[row.ISO3] , axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ISO3_flag']  = df.apply(lambda row: ISO3s_with_flags[row.ISO3] , axis = 1)


In [266]:
with open('mortality_gdp.json', 'w') as f:
    f.write(df.to_json(orient='records'))

In [132]:
df.to_excel('processed.xlsx')

In [252]:
changes_df = df.loc[(df.Year==2000) | (df.Year==2017)][['Year', 'Country', 'ISO3', 'Maternal Mortality']]
changes_df = changes_df.drop_duplicates(['Year', 'ISO3'], False)
changes_df = changes_df.pivot(index="ISO3", columns=["Year"], values=["Maternal Mortality", 'Country'])


  changes_df = changes_df.drop_duplicates(['Year', 'ISO3'], False)


In [254]:
changes_df.to_excel('changes.xlsx')

### Second Chart - Healthcare Expenditure vs Life Expectency

In [356]:
ex_df = pd.read_excel("expend.xlsx", skiprows=6)
ex_df = ex_df.iloc[1:39, [0]+list(range(3,len(ex_df.columns)))]
ex_df = ex_df.rename(columns={'Year':'Country'})
ex_df = ex_df.melt("Country", var_name='Year', value_name='Expenditure')
ex_df['ISO3'] = ex_df.apply(lambda row: ISO3(row.Country) , axis = 1)
ex_df.Year = ex_df.Year.astype('float64')
ex_df


Unnamed: 0,Country,Year,Expenditure,ISO3
0,Australia,2010.0,3593.395,AUS
1,Austria,2010.0,4258.99,AUT
2,Belgium,2010.0,3908.473,BEL
3,Canada,2010.0,4155.529,CAN
4,Chile,2010.0,1254.254,CHL
...,...,...,...,...
451,Sweden,2021.0,6262.31,SWE
452,Switzerland,2021.0,..,CHE
453,Türkiye,2021.0,..,TUR
454,United Kingdom,2021.0,5387.23,GBR


In [303]:
le_df = pd.read_excel("WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx", skiprows=16)
le_df = le_df[['Region, subregion, country or area *', 'ISO3 Alpha-code','Year', 'Life Expectancy at Birth, both sexes (years)']]
le_df.columns = ['Country', 'ISO3', 'Year', 'Life Expectancy']

In [364]:
df = pd.merge(ex_df, le_df, on=['ISO3', 'Year'], how='inner')
df = df[['ISO3','Country_x','Year', 'Expenditure','Life Expectancy']]
df = df.rename(columns={'Country_x': 'Country'})

In [365]:
df

Unnamed: 0,ISO3,Country,Year,Expenditure,Life Expectancy
0,AUS,Australia,2010.0,3593.395,82.055
1,AUT,Austria,2010.0,4258.99,80.464
2,BEL,Belgium,2010.0,3908.473,80.033
3,CAN,Canada,2010.0,4155.529,81.347
4,CHL,Chile,2010.0,1254.254,78.501
...,...,...,...,...,...
451,SWE,Sweden,2021.0,6262.31,82.983
452,CHE,Switzerland,2021.0,..,83.987
453,TUR,Türkiye,2021.0,..,76.032
454,GBR,United Kingdom,2021.0,5387.23,80.742


0      2010.0
1      2010.0
2      2010.0
3      2010.0
4      2010.0
        ...  
451    2021.0
452    2021.0
453    2021.0
454    2021.0
455    2021.0
Name: Year, Length: 456, dtype: float64