In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import os 
from sklearn.model_selection import train_test_split
from create_datasets import createData, cleanData
from create_datasets import createSplits

In [2]:
developed,developing = createData()
cleanData(developed)
cleanData(developing)

# print(type(developed))

indicatorList = pd.read_csv('Indicator.csv')
indicatorsToPick = indicatorList[indicatorList['included']==1].drop(index=2)
attributes = list(indicatorsToPick.feature_name)


In [34]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Ridge

def validate(ridge_model, splits):
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    # evaluate model
    scores = cross_val_score(ridge_model, splits[0], splits[2], scoring='neg_root_mean_squared_error', cv=cv, n_jobs=-1)
    # force scores to be positive
    scores = np.absolute(scores)
    print('RMSE: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))


def compute_coeff(is_developed, dev, split_size):
    if is_developed == 0: # Developing countries
            print("Computing for Developing Countries/Regions")
    else:
        print("Conmputing for Developed Countries/Regions")

    coeff_dict = {}
    for i in attributes:
        coeff_dict[i] = []

    # print(len(dev))
    for i in range(len(dev)):
        splits = createSplits(i,dev,split_size)
        ridge_model = Ridge(alpha=1.0)

        ridge_model.fit(splits[0],splits[2])
        coef = ridge_model.coef_

        validate(ridge_model,splits)

        # print(coef[0])
        # print(ridge_model.coef_[0])
        for j in range(len(attributes)):
            try:
                coeff_dict[attributes[j]].append(coef[0][j])
            except Exception as e:
                print(type(coeff_dict[attributes[i]]),type(coef))
        print("Year:", 2010+i, "Score:",ridge_model.score(splits[0],splits[2]))
        print()
    return coeff_dict

attcoeff_developed = compute_coeff(1,developed, 0.15)
attcoeff_developing = compute_coeff(0,developing, 0.1)

avgcoef_developed = {}
avgcoef_developing = {}
for i in attcoeff_developed:
    avgcoef_developed[i] = np.mean(attcoeff_developed[i])
    avgcoef_developing[i] = np.mean(attcoeff_developing[i])

Conmputing for Developed Countries/Regions
RMSE: 3.898 (2.644)
Year: 2010 Score: 0.29342199682169345

RMSE: 2.817 (1.828)
Year: 2011 Score: 0.3914349835885348

RMSE: 3.117 (1.576)
Year: 2012 Score: 0.4335993469005748

RMSE: 2.724 (1.284)
Year: 2013 Score: 0.26937393975923707

RMSE: 2.396 (1.096)
Year: 2014 Score: 0.20203458155939158

RMSE: 1.921 (1.049)
Year: 2015 Score: 0.3751902182876109

RMSE: 2.024 (0.985)
Year: 2016 Score: 0.2922854295159457

RMSE: 1.885 (1.009)
Year: 2017 Score: 0.4769141347826949

Computing for Developing Countries/Regions
RMSE: 3.707 (1.081)
Year: 2010 Score: 0.0765167737534671

RMSE: 5.177 (3.497)
Year: 2011 Score: 0.08245968983187857

RMSE: 6.972 (8.006)
Year: 2012 Score: 0.017329536966645853

RMSE: 4.318 (2.155)
Year: 2013 Score: 0.06913135984704277

RMSE: 3.182 (1.067)
Year: 2014 Score: 0.24824158506304958

RMSE: 4.208 (1.866)
Year: 2015 Score: 0.11704572647690015

RMSE: 3.625 (1.300)
Year: 2016 Score: 0.09145571171648503

RMSE: 3.535 (1.326)
Year: 2017 Sco

In [35]:
print("Coefficients for Developed Countries:\n")
print("Indicator\t\tImportance")
for i,j in avgcoef_developed.items():
    print(f"{i}\t\t{j}")

Coefficients for Developed Countries:

Indicator		Importance
pop_grow		0.29073153375539357
gini_index		-0.04715106065598779
unemp		-0.044209528649964934
life_exp		-0.3128352678278332
poverty		1.09599245346641
mil_xpnd		0.014140223333949348
lit_rate		0.13442743515296648
labour_force		-1.3338309850255234e-09
refugee_asylum		-7.098716887323942e-07


In [36]:
print("Coefficients for Developing Countries:\n")
print("Indicator\t\tImportance")
for i,j in avgcoef_developing.items():
    print(f"{i}\t\t{j}")

Coefficients for Developing Countries:

Indicator		Importance
pop_grow		-0.4292893242893744
gini_index		0.012435098464630685
unemp		-0.06535169782296625
life_exp		-0.012061004186996719
poverty		-0.009866386315274787
mil_xpnd		-0.3944667938465035
lit_rate		-0.016582666792136498
labour_force		1.6709578873734982e-09
refugee_asylum		-1.794704149456305e-07
