In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pulp

# display all columns of df
pd.set_option('display.max_columns', None)


In [19]:
hdi_data = pd.read_excel('data/processed/HDI_Table.xlsx')
hdi_data.head()

Unnamed: 0,HDI rank,Country,HDI,Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,HDI rank 2020
0,180,Afghanistan,0.478,61.9824,10.263844,2.98507,1824.190915,177
1,67,Albania,0.796,76.4626,14.448,11.286455,14131.11039,68
2,91,Algeria,0.745,76.3767,14.626896,8.069284,10800.22546,96
3,148,Angola,0.586,61.6434,12.1721,5.417391,5465.617791,149
4,71,Antigua and Barbuda,0.788,78.4968,14.184626,9.293741,16792.36595,71


In [20]:
hci_data = pd.read_excel('data/processed/HCI_Table.xlsx')
hci_data.head()

Unnamed: 0,Country,HCI
0,Afghanistan,0.400284
1,Albania,0.634251
2,Algeria,0.534556
3,Angola,0.362405
4,Antigua and Barbuda,0.595704


In [21]:
# drop countries if not in both datasets
hdi_data = hdi_data[hdi_data['Country'].isin(hci_data['Country'])]
hci_data = hci_data[hci_data['Country'].isin(hdi_data['Country'])]
hdi_data['Schooling'] = hdi_data[['Expected years of schooling', 'Mean years of schooling']].mean(axis=1)
hdi_data['Life expectancy'] = hdi_data['Life expectancy at birth']
hdi_data['living standarts'] = hdi_data['Gross national income (GNI) per capita']

hdi_data = hdi_data[['Country', 'Schooling', 'Life expectancy', 'living standarts']]
hdi_data


Unnamed: 0,Country,Schooling,Life expectancy,living standarts
0,Afghanistan,6.624457,61.9824,1824.190915
1,Albania,12.867227,76.4626,14131.110390
2,Algeria,11.348090,76.3767,10800.225460
3,Angola,8.794745,61.6434,5465.617791
4,Antigua and Barbuda,11.739184,78.4968,16792.365950
...,...,...,...,...
177,Uruguay,12.907575,75.4356,21268.881220
178,Uzbekistan,12.186365,70.8616,7916.785725
179,Vanuatu,9.300081,70.4490,3085.411633
183,Zambia,9.057926,61.2234,3217.767739


In [24]:
# sort both datasets by country
hdi_data = hdi_data.sort_values(by=['Country'])
hci_data = hci_data.sort_values(by=['Country'])

In [27]:
hdi_data

Unnamed: 0,Country,Schooling,Life expectancy,living standarts
0,Afghanistan,6.624457,61.9824,1824.190915
1,Albania,12.867227,76.4626,14131.110390
2,Algeria,11.348090,76.3767,10800.225460
3,Angola,8.794745,61.6434,5465.617791
4,Antigua and Barbuda,11.739184,78.4968,16792.365950
...,...,...,...,...
177,Uruguay,12.907575,75.4356,21268.881220
178,Uzbekistan,12.186365,70.8616,7916.785725
179,Vanuatu,9.300081,70.4490,3085.411633
183,Zambia,9.057926,61.2234,3217.767739


In [42]:
# scale data
hdi_data['Schooling'] = hdi_data['Schooling'] / hdi_data['Schooling'].max()
hdi_data['Life expectancy'] = hdi_data['Life expectancy'] / hdi_data['Life expectancy'].max()
hdi_data['living standarts'] = hdi_data['living standarts'] / hdi_data['living standarts'].max()

hci_data['HCI'] = hci_data['HCI'] / hci_data['HCI'].max()

In [44]:
# parameters
n = 3 # number of criteria (health, education, income)
N = hdi_data.shape[0] # number of countries
M = int(N*(N-1)/2) # number of pairs of countries
delta = 0.01 # threshold for the difference between two weights

lp = pulp.LpProblem("HDI_Weight_Inference", pulp.LpMinimize)

# variables
weights = pulp.LpVariable.dicts("weights", range(n), lowBound=0, upBound=1, cat='Continuous')
errors = pulp.LpVariable.dicts("errors", range(M), lowBound=0, cat='Continuous')

# objective function
lp += pulp.lpSum(errors[i] for i in range(M))

# constraints
for i in range(N):
    for k in range(i +1 , N):
        f_ij = hdi_data.iloc[i, 1:4].values
        f_ik = hdi_data.iloc[k, 1:4].values
        if hci_data.iloc[i, 1] > hci_data.iloc[k, 1]:
            lp += pulp.lpSum(weights[j]*(f_ij[j] - f_ik[j]) for j in range(n)) + errors[i] >= delta
        else:
            lp += pulp.lpSum(weights[j]*(f_ik[j] - f_ij[j]) for j in range(n)) + errors[i] >= delta
lp += pulp.lpSum(weights[j] for j in range(n)) == 1

lp.solve()

# print the results
print("Status:", pulp.LpStatus[lp.status])
print("Optimal weights:")
for v in lp.variables():
    print(v.name, "=", v.varValue)



Status: Optimal
Optimal weights:
errors_0 = 0.06589332
errors_1 = 0.046730788
errors_10 = 0.079347154
errors_100 = 0.058164027
errors_1000 = 0.0
errors_10000 = 0.0
errors_10001 = 0.0
errors_10002 = 0.0
errors_10003 = 0.0
errors_10004 = 0.0
errors_10005 = 0.0
errors_10006 = 0.0
errors_10007 = 0.0
errors_10008 = 0.0
errors_10009 = 0.0
errors_1001 = 0.0
errors_10010 = 0.0
errors_10011 = 0.0
errors_10012 = 0.0
errors_10013 = 0.0
errors_10014 = 0.0
errors_10015 = 0.0
errors_10016 = 0.0
errors_10017 = 0.0
errors_10018 = 0.0
errors_10019 = 0.0
errors_1002 = 0.0
errors_10020 = 0.0
errors_10021 = 0.0
errors_10022 = 0.0
errors_10023 = 0.0
errors_10024 = 0.0
errors_10025 = 0.0
errors_10026 = 0.0
errors_10027 = 0.0
errors_10028 = 0.0
errors_10029 = 0.0
errors_1003 = 0.0
errors_10030 = 0.0
errors_10031 = 0.0
errors_10032 = 0.0
errors_10033 = 0.0
errors_10034 = 0.0
errors_10035 = 0.0
errors_10036 = 0.0
errors_10037 = 0.0
errors_10038 = 0.0
errors_10039 = 0.0
errors_1004 = 0.0
errors_10040 = 0.0
erro