# Import package

In [1]:
import numpy as np               
import pandas as pd       
import matplotlib.pyplot as plt 

# Data to estimate

In [2]:
# The given data encoded into vectors and matrices

attributes = np.array(["accuracy", "f1_score", "roc_auc", "cross_entropy_loss", "training_time[s]"])
candidates = np.array(["Light GBM", "SVM", "Random Forest", "Pytorch ANN"])
data = pd.read_csv('CTGNSP Score.csv',index_col=0)
raw_data=data.values
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])
# weights = np.array([0.2, 0.2, 0.2, 0.3, 0.1])

# The indices of the attributes (zero-based) that are considered beneficial.
# Those indices not mentioned are assumed to be cost attributes.
# benefit_attributes = set([0, 1, 2, 3, 4])
benefit_attributes = set([0, 1, 2])
# Display the raw data we have
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,accuracy,f1_score,roc_auc,cross_entropy_loss,training_time[s]
Light GBM,0.9319,0.9319,0.9825,0.1832,0.5911
SVM,0.9225,0.9225,0.9739,0.2152,0.5893
Random Forest,0.939,0.939,0.9833,0.1816,0.2525
Pytorch ANN,0.9061,0.9061,0.9755,0.6319,2.8793


# Normalizing the ratings

In [3]:
m = len(raw_data)
n = len(attributes)
divisors = np.empty(n)
for j in range(n):
    column = raw_data[:,j]
    divisors[j] = np.sqrt(column @ column)

raw_data /= divisors

columns = ["$X_{%d}$" % j for j in range(n)]
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.503753,0.503753,0.501886,0.255992,0.19648
SVM,0.498672,0.498672,0.497492,0.300707,0.195881
Random Forest,0.507591,0.507591,0.502294,0.253757,0.08393
Pytorch ANN,0.489807,0.489807,0.49831,0.882978,0.957069


# Calculating the weighted Normalizing Ratings

In [4]:
raw_data *= weights
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.100751,0.100751,0.100377,0.051198,0.039296
SVM,0.099734,0.099734,0.099498,0.060141,0.039176
Random Forest,0.101518,0.101518,0.100459,0.050751,0.016786
Pytorch ANN,0.097961,0.097961,0.099662,0.176596,0.191414


# Identifying PIS AND NIS

In [5]:
a_pos = np.zeros(n)
a_neg = np.zeros(n)
for j in range(n):
    column = raw_data[:,j]
    max_val = np.max(column)
    min_val = np.min(column)
    
    # See if we want to maximize benefit or minimize cost (for PIS)
    if j in benefit_attributes:
        a_pos[j] = max_val
        a_neg[j] = min_val
    else:
        a_pos[j] = min_val
        a_neg[j] = max_val

pd.DataFrame(data=[a_pos, a_neg], index=["$A^*$", "$A^-$"], columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
$A^*$,0.101518,0.101518,0.100459,0.050751,0.016786
$A^-$,0.097961,0.097961,0.099498,0.176596,0.191414


# Calculating Euclidean Distance and Similarities to PIS

In [6]:
sp = np.zeros(m)
sn = np.zeros(m)
cs = np.zeros(m)

for i in range(m):
    diff_pos = raw_data[i] - a_pos
    diff_neg = raw_data[i] - a_neg
    sp[i] = np.sqrt(diff_pos @ diff_pos)
    sn[i] = np.sqrt(diff_neg @ diff_neg)
    cs[i] = sn[i] / (sp[i] + sn[i])

CTPNSP=pd.DataFrame(data=zip(sp, sn, cs), index=candidates, columns=["$S^*$", "$S^-$", "$C^*$"])

In [7]:
CTPNSP

Unnamed: 0,$S^*$,$S^-$,$C^*$
Light GBM,0.022541,0.197182,0.897413
SVM,0.024429,0.191688,0.886963
Random Forest,0.0,0.215309,1.0
Pytorch ANN,0.215308,0.000163,0.000759
