In [36]:
import requests
import pandas as pd
import json


# Opeing the json file I made ealier for another chart, as it has relevant infomrtion
with open('Tax vs Spending per capita data.json') as f:
    data = json.load(f)
df = pd.DataFrame(data)


# Scraping the table from the website, and making it a dataframe

url = "https://rankedex.com/society-rankings/education-index"

response = requests.get(url)
df_Education = pd.read_html(response.text)[0]

df_Education


Unnamed: 0.1,Unnamed: 0,Rank,Education index
0,Germany,1,0.940
1,Australia,2,0.929
2,Denmark,3,0.920
3,Ireland,4,0.918
4,New Zealand,5,0.917
...,...,...,...
184,South Sudan,157,0.297
185,Mali,158,0.293
186,Burkina Faso,159,0.286
187,Eritrea,160,0.281


In [37]:
import pycountry

# Using the pycountry code to assing country code values to the country names in order to make merging with the other data easier.
# Using country codes prevents problems from different datasets using slightly different names for the same countrys.

def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_3
    except:
        return "Attention has not worked"
# Making it obvioous where it has not worked

iso_map = {country: do_fuzzy_search(country)
           for country in df_Education["Unnamed: 0"].unique()}
df_Education["Country code"] = df_Education["Unnamed: 0"].map(iso_map)

# Manually assigning the values for where pycountry failed.
df_Education.loc[102, 'Country code'] = 'PSE'
df_Education.loc[132, 'Country code'] = 'CPV'
df_Education.loc[138, 'Country code'] = 'SWZ'
df_Education.loc[144, 'Country code'] = 'TMP'
df_Education.loc[150, 'Country code'] = 'COD'
df_Education.loc[152, 'Country code'] = 'LAO'
df_Education.loc[166, 'Country code'] = 'CIV'


df_Education.head()


Unnamed: 0.1,Unnamed: 0,Rank,Education index,Country code
0,Germany,1,0.94,DEU
1,Australia,2,0.929,AUS
2,Denmark,3,0.92,DNK
3,Ireland,4,0.918,IRL
4,New Zealand,5,0.917,NZL


In [38]:
# Merging the two datasets together by their country codes.

df_merged = pd.merge(df_Education, df, on='Country code')



In [39]:
# Dropping unwanted column

df_merged = df_merged.drop(columns=['Unnamed: 0', 'Rank'])
df_merged.head()


Unnamed: 0,Education index,Country code,Country,Population,GDP,GDP per capita,Total tax revenue,Government spending,Spending per Capita,Tax per Capita
0,0.94,DEU,Germany,83237124.0,4262767000000.0,51238.0,1666454000000.0,2362676000000.0,28384.882688,20020.561979
1,0.929,AUS,Australia,25767000.0,1635255000000.0,63464.0,459546000000.0,593856000000.0,23047.153336,17834.672255
2,0.92,DNK,Denmark,5873420.0,398303000000.0,68202.0,180287000000.0,193831000000.0,33001.385905,30695.404041
3,0.918,IRL,Ireland,5060005.0,504517000000.0,100129.0,93221000000.0,117149000000.0,23151.953407,18423.104325
4,0.917,NZL,New Zealand,5112000.0,246974000000.0,48317.0,72532000000.0,90431000000.0,17689.945227,14188.5759


In [40]:
# Scraping website for life expectancy data

Life = pd.read_html(
    'https://www.worldometers.info/demographics/life-expectancy/')
Life[0]

df_Life = Life[0]
df_Life = df_Life.rename(
    columns={'Life Expectancy (both sexes)': 'Life expectancy'})

df_Life = df_Life[['Life expectancy', 'Country']]


# Doing the same, assinging country codes for the Life expectancy dataset.
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
        return result[0].alpha_3
    except:
        return "Attention has not worked"


iso_map = {country: do_fuzzy_search(country)
           for country in df_Life["Country"].unique()}
df_Life["Country code"] = df_Life["Country"].map(iso_map)


# Adding the remaining codes manually
df_Life.loc[8, 'Country code'] = 'GGY'
df_Life.loc[34, 'Country code'] = 'VIR'
df_Life.loc[43, 'Country code'] = 'CZE'
df_Life.loc[120, 'Country code'] = 'VCT'
df_Life.loc[139, 'Country code'] = 'STP'
df_Life.loc[149, 'Country code'] = 'LAO'
df_Life.loc[188, 'Country code'] = 'COD'

df_Life = df_Life.drop('Country', axis=1)

# Merging the dataframes
df_merged = pd.merge(df_Life, df_merged, on='Country code')

# Removing duplicates
df_merged = df_merged.drop_duplicates(subset='Country code')
df_merged = df_merged.reset_index(drop=True)

df_merged.head(10)

Unnamed: 0,Life expectancy,Country code,Education index,Country,Population,GDP,GDP per capita,Total tax revenue,Government spending,Spending per Capita,Tax per Capita
0,85.03,JPN,0.848,Japan,125681593.0,4932556000000.0,39246.0,1334944000000.0,1609710000000.0,12807.842116,10621.634944
1,84.25,CHE,0.897,Switzerland,8736510.0,799796000000.0,91546.0,234445000000.0,264232000000.0,30244.571345,26835.086322
2,84.07,SGP,0.832,Singapore,5454000.0,396992000000.0,72795.0,59678000000.0,96031000000.0,17607.444078,10942.060873
3,84.01,ITA,0.791,Italy,58983122.0,2101275000000.0,35625.0,620739000000.0,940771000000.0,15949.83392,10524.010581
4,83.99,ESP,0.824,Spain,47615034.0,1426224000000.0,30090.0,481945000000.0,657750000000.0,13813.91432,10121.698117
5,83.94,AUS,0.929,Australia,25767000.0,1635255000000.0,63464.0,459546000000.0,593856000000.0,23047.153336,17834.672255
6,83.52,ISL,0.912,Iceland,376248.0,25459000000.0,67665.0,9962000000.0,9735000000.0,25873.891688,26477.217155
7,83.5,KOR,0.862,South Korea,51736000.0,1797810000000.0,34744.0,363120000000.0,414452000000.0,8010.9015,7018.710376
8,83.49,ISR,0.874,Israel,9367000.0,488527000000.0,52152.0,128865000000.0,178465000000.0,19052.524821,13757.339596
9,83.33,SWE,0.904,Sweden,10452326.0,635664000000.0,60816.0,251009000000.0,282223000000.0,27000.975668,24014.654729


In [41]:
import json
# Exporting the merged dataframe as a json file

exported_values = list(df_merged.T.to_dict().values())
open('Gov spending, Education and Life expectancy.json',
     'w').write(json.dumps(exported_values))


58256