In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = {}
pillars = ["busi", "econ", "educ", "envi", "gove", "heal", "pers", "safe", "soci"]
for i in pillars:
    train_df = pd.read_csv("data/{0}_train.csv".format(i)).drop("Unnamed: 0", axis = 1)
    test_df = pd.read_csv("data/{0}_test.csv".format(i)).drop("Unnamed: 0", axis = 1)
    data[i] = {"test" : test_df, "train" : train_df}

In [2]:
score_table = pd.concat([
    data["busi"]["train"][["isocode","year","busi"]],
    data["econ"]["train"][["econ"]],data["educ"]["train"][["educ"]],data["envi"]["train"][["envi"]],
    data["gove"]["train"][["gove"]],data["heal"]["train"][["heal"]],data["pers"]["train"][["pers"]],
    data["safe"]["train"][["safe"]],data["soci"]["train"][["soci"]]
    ],axis=1)
score_table["prosperity"] = score_table.iloc[:,2:].mean(axis=1)
prosperity_table = score_table[["isocode","year","prosperity"]]
# Most prosperous country and year
prosperity_table.sort_values("prosperity", ascending=False).head(5)

Unnamed: 0,isocode,year,prosperity
1149,NZL,2014,78.914742
1000,NZL,2013,78.832643
576,SWE,2010,78.657402
1147,NOR,2014,78.500937
725,SWE,2011,78.487949


In [21]:
prosperity_growth_table = pd.pivot_table(
    prosperity_table,
    values="prosperity",index="isocode",
    columns="year").diff(axis=1).iloc[:,1:]
region_data = pd.read_csv("data/region_data.csv")[["alpha-3", "region"]]
prosperity_growth_table = pd.merge(prosperity_growth_table.reset_index(), region_data, left_on = "isocode", right_on = "alpha-3").drop("alpha-3", axis = 1)
prosperity_subregion = prosperity_growth_table.groupby("region").mean().mean(axis=1).sort_values(ascending=False)

prosperity_growth_table["region-score"] = prosperity_growth_table["region"]\
    .apply(lambda x: prosperity_subregion.to_dict()[x])

prosperity_growth_table.melt(id_vars=["isocode", "region", "region-score"], 
                             var_name="Year", 
                             value_name="Prosperity Growth").to_csv("data/continental_prosperity_data.csv", index = False)

prosperity_growth_table.groupby("region")\
    .mean()\
    .rank(ascending = False)[[2008, 2009, 2010, 2011, 2012, 2013, 2014]]\
    .reset_index()\
    .melt(id_vars=["region"], 
          var_name="Year", 
          value_name="Prosperity Growth Rank")\
    .to_csv("data/continental_prosperity_rank.csv")