In [1]:
import pandas as pd

In [None]:
# These data were downloaded manually from the Notre Dame Global Adaptation Initiative
df_nd = pd.read_csv("resources/gain/gain.csv")
df_nd.head()

Unnamed: 0,ISO3,Name,1995,1996,1997,1998,1999,2000,2001,2002,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,AFG,Afghanistan,34.78353,34.775074,34.988812,35.293407,35.177507,35.065559,35.198269,35.335123,...,31.544162,31.795961,31.903926,31.346547,31.165179,31.910511,31.727814,32.833517,32.633596,32.765017
1,ALB,Albania,41.396494,41.379214,41.333451,41.100159,41.026585,41.38143,41.395906,41.602467,...,46.435235,47.225916,47.113945,47.064079,47.34087,47.412095,47.738609,48.44332,49.348682,49.747451
2,DZA,Algeria,45.208524,45.310608,44.711577,44.217693,44.233838,44.198576,44.255934,44.295123,...,44.982398,45.123443,45.471441,46.624614,46.353467,46.487537,47.531633,47.246477,47.391899,47.689392
3,AND,Andorra,,,,,,,,,...,,,,,,,,,,
4,AGO,Angola,34.167486,34.149985,34.130968,34.111716,34.072943,34.030188,34.12944,34.436386,...,31.935004,31.790461,32.897104,33.213149,34.102874,34.774563,35.292144,36.595834,36.894235,37.043357


In [None]:
# Reduce the dataset to the target variable of the same year as the EPI dataset
# and turn the values into categorical data
df_nd = df_nd[["ISO3", "2022"]].dropna()

def categorize(value):
    if value > 60:
        return "high"
    elif value < 40:
        return "low"
    else:
        return "normal"

df_nd["2022"] = df_nd["2022"].apply(categorize)
df_nd.columns = ["iso", "nd-gain"]
df_nd.head()

Unnamed: 0,iso,nd-gain
0,AFG,low
1,ALB,normal
2,DZA,normal
4,AGO,low
5,ATG,normal


In [None]:
# Load the cleaned EPI dataset
df_epi = pd.read_csv("epi_cleaned.csv")
df_epi.head()

Unnamed: 0,code,iso,country,AMP,APR,BCA,BHV,BLC,CDA,CDO,...,TC5,TCC,TCL,USD,UWD,VOE,WL5,WTA,WTL,WWT
0,4,AFG,Afghanistan,0.0,13.13483,0.021409,0.418988,14.917438,-0.051453,9780.0,...,85.0,39.0,6e-05,466.408266,674.481233,0.015732,0.0,71.499301,0.0,0.0
1,8,ALB,Albania,178.803804,9.532294,-0.111462,0.348171,0.945792,0.011736,5560.0,...,7684.0,1241.0,0.002041,39.41207,116.09586,0.009952,0.738103,56.01069,0.002602,0.01853
2,12,DZA,Algeria,26.558865,1.27322,0.0067,0.596696,69.293182,0.047309,184000.0,...,84559.0,14169.0,0.011911,39.637104,127.632487,0.021838,0.0,21.748026,0.0,0.3312
3,20,AND,Andorra,0.0,3.480068,-0.007744,0.563142,27.61729,0.024957,458.0,...,56.0,28.0,0.000567,0.913971,1.655597,0.01144,52.151606,10938.094877,0.003112,1.0
4,24,AGO,Angola,1.40094,0.083847,0.015176,0.614766,27.555363,0.01673,27000.0,...,1400956.0,298827.0,0.00425,1453.097805,2329.179532,0.064238,0.0,7178.013668,0.0,0.0


In [None]:
# Combine the datasets, and remove missing values again
df_merged = df_epi.merge(df_nd, on = "iso", how = "left").dropna()
df_merged.head()

Unnamed: 0,code,iso,country,AMP,APR,BCA,BHV,BLC,CDA,CDO,...,TCC,TCL,USD,UWD,VOE,WL5,WTA,WTL,WWT,nd-gain
0,4,AFG,Afghanistan,0.0,13.13483,0.021409,0.418988,14.917438,-0.051453,9780.0,...,39.0,6e-05,466.408266,674.481233,0.015732,0.0,71.499301,0.0,0.0,low
1,8,ALB,Albania,178.803804,9.532294,-0.111462,0.348171,0.945792,0.011736,5560.0,...,1241.0,0.002041,39.41207,116.09586,0.009952,0.738103,56.01069,0.002602,0.01853,normal
2,12,DZA,Algeria,26.558865,1.27322,0.0067,0.596696,69.293182,0.047309,184000.0,...,14169.0,0.011911,39.637104,127.632487,0.021838,0.0,21.748026,0.0,0.3312,normal
4,24,AGO,Angola,1.40094,0.083847,0.015176,0.614766,27.555363,0.01673,27000.0,...,298827.0,0.00425,1453.097805,2329.179532,0.064238,0.0,7178.013668,0.0,0.0,low
5,28,ATG,Antigua and Barbuda,291.127316,9.924773,0.003106,0.339933,0.034825,0.019086,537.0,...,15.0,0.002896,46.832324,174.021747,0.001167,0.0,22.476116,0.0,0.157115,normal


In [None]:
# 15% of countries from the EPI dataset were not covered by the Notre Dame dataset
# This is an acceptable loss
print(f"Entries in EPI dataframe:    {df_epi.shape[0]}")
print(f"Entries in merged dataframe: {df_merged.shape[0]}")

Entries in EPI dataframe:    220
Entries in merged dataframe: 187


In [42]:
# Save the dataframe to a CSV file to be used later
df_merged.to_csv("epi_nd-gain.csv", index = False)