# Import package

In [1]:
import numpy as np               
import pandas as pd       
import matplotlib.pyplot as plt 

# Data to estimate

In [2]:
# The given data encoded into vectors and matrices

attributes = np.array(["accuracy", "f1_score", "roc_auc", "cross_entropy_loss", "training_time[s]"])
candidates = np.array(["Light GBM", "SVM", "Random Forest", "Pytorch ANN"])
data = pd.read_csv('HD Score.csv',index_col=0)
raw_data=data.values
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])
# weights = np.array([0.2, 0.2, 0.2, 0.3, 0.1])

# The indices of the attributes (zero-based) that are considered beneficial.
# Those indices not mentioned are assumed to be cost attributes.
# benefit_attributes = set([0, 1, 2, 3, 4])
benefit_attributes = set([0, 1, 2])
# Display the raw data we have
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,accuracy,f1_score,roc_auc,cross_entropy_loss,training_time[s]
Light GBM,0.8361,0.8361,0.9405,0.3494,0.0331
SVM,0.8525,0.8525,0.9481,0.3258,0.0058
Random Forest,0.8852,0.8852,0.9307,0.3635,0.1153
Pytorch ANN,0.8852,0.8852,0.9048,0.477,0.3162


# Normalizing the ratings

In [3]:
m = len(raw_data)
n = len(attributes)
divisors = np.empty(n)
for j in range(n):
    column = raw_data[:,j]
    divisors[j] = np.sqrt(column @ column)

raw_data /= divisors

columns = ["$X_{%d}$" % j for j in range(n)]
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.483289,0.483289,0.505011,0.455697,0.09786
SVM,0.492768,0.492768,0.509092,0.424917,0.017148
Random Forest,0.51167,0.51167,0.499748,0.474086,0.340883
Pytorch ANN,0.51167,0.51167,0.485841,0.622116,0.934841


# Calculating the weighted Normalizing Ratings

In [4]:
raw_data *= weights
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.096658,0.096658,0.101002,0.091139,0.019572
SVM,0.098554,0.098554,0.101818,0.084983,0.00343
Random Forest,0.102334,0.102334,0.09995,0.094817,0.068177
Pytorch ANN,0.102334,0.102334,0.097168,0.124423,0.186968


# Identifying PIS AND NIS

In [5]:
a_pos = np.zeros(n)
a_neg = np.zeros(n)
for j in range(n):
    column = raw_data[:,j]
    max_val = np.max(column)
    min_val = np.min(column)
    
    # See if we want to maximize benefit or minimize cost (for PIS)
    if j in benefit_attributes:
        a_pos[j] = max_val
        a_neg[j] = min_val
    else:
        a_pos[j] = min_val
        a_neg[j] = max_val

pd.DataFrame(data=[a_pos, a_neg], index=["$A^*$", "$A^-$"], columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
$A^*$,0.102334,0.102334,0.101818,0.084983,0.00343
$A^-$,0.096658,0.096658,0.097168,0.124423,0.186968


# Calculating Euclidean Distance and Similarities to PIS

In [6]:
sp = np.zeros(m)
sn = np.zeros(m)
cs = np.zeros(m)

for i in range(m):
    diff_pos = raw_data[i] - a_pos
    diff_neg = raw_data[i] - a_neg
    sp[i] = np.sqrt(diff_pos @ diff_pos)
    sn[i] = np.sqrt(diff_neg @ diff_neg)
    cs[i] = sn[i] / (sp[i] + sn[i])

pd.DataFrame(data=zip(sp, sn, cs), index=candidates, columns=["$S^*$", "$S^-$", "$C^*$"])

Unnamed: 0,$S^*$,$S^-$,$C^*$
Light GBM,0.019068,0.170716,0.899529
SVM,0.005346,0.187805,0.972321
Random Forest,0.065516,0.12272,0.651946
Pytorch ANN,0.187786,0.008027,0.040995
