In [39]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from statsmodels.graphics.mosaicplot import mosaic
import seaborn as sns
from scipy.stats import entropy
from sklearn.metrics import mutual_info_score
%matplotlib inline

In [2]:
cols = [
    'Country',
    'Quality of Life Index',
    'Purchasing Power Index',
    'Safety Index',
    'Health Care Index',
    'Cost of Living Index',
    'Property Price to Income Ratio',
    'Traffic Commute Time Index',
    'Pollution Index',
    'Climate Index',
]

data = pd.read_csv('country_data.csv', '\t', header=None).set_index(0)
data.columns = cols

In [40]:
def calc_MI(x, y, bins):
    c_xy = np.histogram2d(x, y, bins)[0]
    mi = mutual_info_score(None, None, contingency=c_xy)
    return mi

def calc_entropy(x, bins):
    hist = np.histogram(x, bins)[0]
#     print(hist)
    ent = entropy(hist)
    return ent

In [58]:
importance_two = dict()
max_mi = 0
min_mi = 2*10**9
cols = data.columns
for i, col1 in enumerate(cols[1:]):
    for col2 in cols[i+1:]:
        if col1 != col2:
            mi = calc_MI(data[col1], data[col2], 20)
            if max_mi < mi: 
                max_mi = mi
            if min_mi > mi:
                min_mi = mi
            importance_two[(col1, col2)] = calc_MI(data[col1], data[col2], 20)
importance_two = {key: (mi - min_mi)/(max_mi - min_mi) for key, mi in importance_two.items()}

In [59]:
importance_one = dict()
max_entropy = 0
min_entropy = 2*10**9
for col in data.columns[1:]:
    ent = calc_entropy(data[col], 20)
    if max_entropy < ent:
        max_entropy = ent
    if min_entropy > ent:
        min_entropy = ent
    importance_one[(col, )] = ent
importance_one = {key: (ent - min_entropy)/(max_entropy - min_entropy) for key, ent in importance_one.items()}

In [60]:
importance = {**importance_one, **importance_two}

In [61]:
importance = [(k, importance[k]) for k in sorted(importance, key=importance.get, reverse=True)]

In [62]:
with open('importance', 'w') as f:
    f.write(str(importance))