# Import package

In [1]:
import numpy as np               
import pandas as pd       
import matplotlib.pyplot as plt 

# Data to estimate

In [2]:
# The given data encoded into vectors and matrices

attributes = np.array(["accuracy", "f1_score", "roc_auc", "cross_entropy_loss", "training_time[s]"])
candidates = np.array(["Light GBM", "SVM", "Random Forest", "Pytorch ANN"])
data = pd.read_csv('CTGPattern Score.csv',index_col=0)
raw_data=data.values
weights = np.array([0.2, 0.2, 0.2, 0.2, 0.2])
# weights = np.array([0.2, 0.2, 0.2, 0.3, 0.1])

# The indices of the attributes (zero-based) that are considered beneficial.
# Those indices not mentioned are assumed to be cost attributes.
# benefit_attributes = set([0, 1, 2, 3, 4])
benefit_attributes = set([0, 1, 2])
# Display the raw data we have
pd.DataFrame(data=raw_data, index=candidates, columns=attributes)

Unnamed: 0,accuracy,f1_score,roc_auc,cross_entropy_loss,training_time[s]
Light GBM,0.8873,0.8873,0.9881,0.3605,2.1351
SVM,0.8662,0.8662,0.9873,0.3815,0.3817
Random Forest,0.8732,0.8732,0.9863,0.4547,0.389
Pytorch ANN,0.8779,0.8779,0.9807,0.695,3.2025


# Normalizing the ratings

In [3]:
m = len(raw_data)
n = len(attributes)
divisors = np.empty(n)
for j in range(n):
    column = raw_data[:,j]
    divisors[j] = np.sqrt(column @ column)

raw_data /= divisors

columns = ["$X_{%d}$" % j for j in range(n)]
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.506344,0.506344,0.501266,0.366926,0.54924
SVM,0.494303,0.494303,0.50086,0.388301,0.09819
Random Forest,0.498297,0.498297,0.500353,0.462805,0.100068
Pytorch ANN,0.50098,0.50098,0.497512,0.707389,0.823821


# Calculating the weighted Normalizing Ratings

In [4]:
raw_data *= weights
pd.DataFrame(data=raw_data, index=candidates, columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
Light GBM,0.101269,0.101269,0.100253,0.073385,0.109848
SVM,0.098861,0.098861,0.100172,0.07766,0.019638
Random Forest,0.099659,0.099659,0.100071,0.092561,0.020014
Pytorch ANN,0.100196,0.100196,0.099502,0.141478,0.164764


# Identifying PIS AND NIS

In [5]:
a_pos = np.zeros(n)
a_neg = np.zeros(n)
for j in range(n):
    column = raw_data[:,j]
    max_val = np.max(column)
    min_val = np.min(column)
    
    # See if we want to maximize benefit or minimize cost (for PIS)
    if j in benefit_attributes:
        a_pos[j] = max_val
        a_neg[j] = min_val
    else:
        a_pos[j] = min_val
        a_neg[j] = max_val

pd.DataFrame(data=[a_pos, a_neg], index=["$A^*$", "$A^-$"], columns=columns)

Unnamed: 0,$X_{0}$,$X_{1}$,$X_{2}$,$X_{3}$,$X_{4}$
$A^*$,0.101269,0.101269,0.100253,0.073385,0.019638
$A^-$,0.098861,0.098861,0.099502,0.141478,0.164764


# Calculating Euclidean Distance and Similarities to PIS

In [6]:
sp = np.zeros(m)
sn = np.zeros(m)
cs = np.zeros(m)

for i in range(m):
    diff_pos = raw_data[i] - a_pos
    diff_neg = raw_data[i] - a_neg
    sp[i] = np.sqrt(diff_pos @ diff_pos)
    sn[i] = np.sqrt(diff_neg @ diff_neg)
    cs[i] = sn[i] / (sp[i] + sn[i])

CTGPattern=pd.DataFrame(data=zip(sp, sn, cs), index=candidates, columns=["$S^*$", "$S^-$", "$C^*$"])

In [7]:
CTGPattern

Unnamed: 0,$S^*$,$S^-$,$C^*$
Light GBM,0.09021,0.087547,0.492511
SVM,0.005466,0.15854,0.96667
Random Forest,0.019315,0.152798,0.887778
Pytorch ANN,0.160316,0.001888,0.011642
