In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "Accuracy" : [86.432, 79.024, 97.204, 96.608, 81.14, 88.928, 95.508, 97.144],
    "cross_val_acc" : [86.3, 79.2, 97.0, 96.3, 81.4, 89.2, 95.8, 97.1],
    "roc_auc" : [86.432, 86.819, 99.028, 99.058, 89.467, 95.763, 98.858, 99.241],
    "f1_score" : [86.461, 78.93, 97.201, 96.588, 81.154, 88.821, 95.498, 97.139],
    "diff_cv_acc" : [0.132, 0.176, 0.204, 0.308, 0.26, 0.272, 0.292, 0.044],
    "speed_fit" : [3.171, 0.262, 37.918, 37.452, 28.579, 68.417, 0.472, 7.757],
    "speed_cv" : [5.295, 2.444, 59.557, 30.596, 27.657, 60.938, 2.801, 30.596]
}

In [3]:
index = ["Decision tree", "Logistic Regression", "KNN", "Random Forest", "AdaBoost", "GradientBoost", "LGBM", "XGBM"]

In [4]:
df = pd.DataFrame(data, index = index)
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,86.432,86.3,86.432,86.461,0.132,3.171,5.295
Logistic Regression,79.024,79.2,86.819,78.93,0.176,0.262,2.444
KNN,97.204,97.0,99.028,97.201,0.204,37.918,59.557
Random Forest,96.608,96.3,99.058,96.588,0.308,37.452,30.596
AdaBoost,81.14,81.4,89.467,81.154,0.26,28.579,27.657
GradientBoost,88.928,89.2,95.763,88.821,0.272,68.417,60.938
LGBM,95.508,95.8,98.858,95.498,0.292,0.472,2.801
XGBM,97.144,97.1,99.241,97.139,0.044,7.757,30.596


### Normalization

In [5]:
columns = ["Accuracy", "cross_val_acc", "roc_auc", "f1_score", "diff_cv_acc",  "speed_fit", "speed_cv"]

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)
df = pd.DataFrame(scaled_df, index = index, columns = columns)
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,-0.547104,-0.580858,-1.462375,-0.538424,-0.931866,-0.868118,-1.005567
Logistic Regression,-1.609059,-1.615113,-1.390748,-1.615988,-0.412852,-0.995452,-1.13476
KNN,0.997088,0.977808,0.868911,0.998295,-0.08257,0.652845,1.453326
Random Forest,0.91165,0.875839,0.874464,0.910585,1.144189,0.632447,0.140953
AdaBoost,-1.305725,-1.29464,-0.900653,-1.29777,0.577993,0.244053,0.007772
GradientBoost,-0.189297,-0.158416,0.26462,-0.200747,0.719542,1.987861,1.515907
LGBM,0.753962,0.803004,0.837447,0.754624,0.955457,-0.986259,-1.118583
XGBM,0.988487,0.992375,0.908333,0.989424,-1.969893,-0.667377,0.140953


### Weighted Normalization

In [7]:
weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]

In [8]:
for i in range(len(df.columns)):
    df[df.columns[i]] = df[df.columns[i]] * weights[i] 
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,-0.273552,-0.290429,-0.731187,-0.269212,-0.465933,-0.434059,-0.502783
Logistic Regression,-0.80453,-0.807556,-0.695374,-0.807994,-0.206426,-0.497726,-0.56738
KNN,0.498544,0.488904,0.434456,0.499148,-0.041285,0.326422,0.726663
Random Forest,0.455825,0.43792,0.437232,0.455293,0.572095,0.316223,0.070476
AdaBoost,-0.652863,-0.64732,-0.450326,-0.648885,0.288996,0.122027,0.003886
GradientBoost,-0.094648,-0.079208,0.13231,-0.100373,0.359771,0.993931,0.757953
LGBM,0.376981,0.401502,0.418724,0.377312,0.477729,-0.49313,-0.559291
XGBM,0.494243,0.496187,0.454167,0.494712,-0.984947,-0.333688,0.070476


### Ideal and Neg_Ideal Solution

In [9]:
criteria = ["Accuracy","cross_val_acc","roc_auc","f1_score", "diff_cv_acc", "speed_fit", "speed_cv" ]
benefit_criteria = ["Accuracy","cross_val_acc","roc_auc","f1_score"]
cost_criteria = ["diff_cv_acc", "speed_fit", "speed_cv"]

In [10]:
A_pos = []
for i in criteria:
    if i in benefit_criteria:
        A_pos.append(df[i].max())
    elif i in cost_criteria:
        A_pos.append(df[i].min())
A_pos = np.round(A_pos, decimals=3)
A_pos  

array([ 0.499,  0.496,  0.454,  0.499, -0.985, -0.498, -0.567])

In [11]:
A_neg = []
for i in criteria:
    if i in benefit_criteria:
        A_neg.append(df[i].min())
    elif i in cost_criteria:
        A_neg.append(df[i].max())
A_neg = np.round(A_neg, decimals=3)
A_neg        

array([-0.805, -0.808, -0.731, -0.808,  0.572,  0.994,  0.758])

### Separation From Positve Ideal Solution

In [12]:
pos_data = df.copy()
for i in range(len(df.columns)):
    pos_data[pos_data.columns[i]] = (pos_data[pos_data.columns[i]] - A_pos[i]) ** 2
pos_data

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,0.596837,0.6184706,1.404669,0.5901498,0.2694308,0.004088482,0.004123768
Logistic Regression,1.699189,1.69926,1.321061,1.708233,0.6061776,7.517552e-08,1.445857e-07
KNN,2.080799e-07,5.035336e-05,0.0003819855,2.182154e-08,0.8905977,0.6796721,1.673564
Random Forest,0.001864096,0.003373334,0.0002811734,0.001910341,2.424544,0.6629595,0.4063762
AdaBoost,1.326788,1.30718,0.817806,1.31764,1.623067,0.3844331,0.3259106
GradientBoost,0.3524185,0.3308641,0.1034844,0.3592483,1.808409,2.225857,1.755501
LGBM,0.01488864,0.008929842,0.001244422,0.014808,2.139575,2.371973e-05,5.942125e-05
XGBM,2.262648e-05,3.514721e-08,2.779717e-08,1.838588e-05,2.859392e-09,0.02699827,0.4063762


In [13]:
sep_ideal=[]
for i in range(len(df)):
    sep_ideal.append(np.sqrt(sum(pos_data.iloc[i])))
sep_ideal = np.round(sep_ideal, decimals=3)
sep_ideal

array([1.868, 2.652, 1.801, 1.871, 2.665, 2.634, 1.476, 0.658])

### Separation From Negative Ideal Solution

In [14]:
neg_data = df.copy()
for i in range(len(df.columns)):
    neg_data[neg_data.columns[i]] = (neg_data[neg_data.columns[i]] - A_neg[i]) ** 2
neg_data

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,0.2824367,0.2678797,3.5109e-08,0.2902924,1.077304,2.039352,1.589575
Logistic Regression,2.213134e-07,1.966945e-07,0.001269204,3.561481e-11,0.6059469,2.225246,1.756633
KNN,1.699227,1.68196,1.358287,1.708635,0.3761187,0.44566,0.0009819976
Random Forest,1.589679,1.552316,1.364765,1.595908,8.966039e-09,0.4593813,0.4726886
AdaBoost,0.02314575,0.02581812,0.07877773,0.02531762,0.0800911,0.7603374,0.5686883
GradientBoost,0.5045994,0.5311379,0.7453043,0.5007356,0.0450412,4.824197e-09,2.180669e-09
LGBM,1.397079,1.462895,1.321864,1.404964,0.008887105,2.211555,1.735257
XGBM,1.688033,1.700905,1.40462,1.697059,2.424082,1.762757,0.4726886


In [15]:
sep_neg_ideal=[]
for i in range(len(df)):
    sep_neg_ideal.append(np.sqrt(sum(neg_data.iloc[i])))
sep_neg_ideal = np.round(sep_neg_ideal, decimals=3)
sep_neg_ideal

array([2.355, 2.142, 2.696, 2.652, 1.25 , 1.525, 3.089, 3.339])

### Relative Closeness

In [16]:
C = []

for i in range(len(df)):
    c = sep_neg_ideal[i]/(sep_ideal[i] + sep_neg_ideal[i])
    C.append(c)
#C = np.round(C, decimals=2)
C

[0.5576604309732418,
 0.4468085106382978,
 0.5995107849677563,
 0.5863365023214682,
 0.31928480204342274,
 0.3666746814138014,
 0.6766703176341731,
 0.8353765323992995]

In [17]:
rank =  np.array([sorted(C, reverse = True).index(x) for x in C]) + [1]
rank
df["rank"] = rank
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv,rank
Decision tree,-0.273552,-0.290429,-0.731187,-0.269212,-0.465933,-0.434059,-0.502783,5
Logistic Regression,-0.80453,-0.807556,-0.695374,-0.807994,-0.206426,-0.497726,-0.56738,6
KNN,0.498544,0.488904,0.434456,0.499148,-0.041285,0.326422,0.726663,3
Random Forest,0.455825,0.43792,0.437232,0.455293,0.572095,0.316223,0.070476,4
AdaBoost,-0.652863,-0.64732,-0.450326,-0.648885,0.288996,0.122027,0.003886,8
GradientBoost,-0.094648,-0.079208,0.13231,-0.100373,0.359771,0.993931,0.757953,7
LGBM,0.376981,0.401502,0.418724,0.377312,0.477729,-0.49313,-0.559291,2
XGBM,0.494243,0.496187,0.454167,0.494712,-0.984947,-0.333688,0.070476,1


In [18]:
print(f"By the TOPSIS method, Your choice should be '{df.index[C.index(max(C))].capitalize()}'.")

By the TOPSIS method, Your choice should be 'Xgbm'.


In [28]:
new_df = df.sort_values('rank')
new_df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv,rank
XGBM,0.494243,0.496187,0.454167,0.494712,-0.984947,-0.333688,0.070476,1
LGBM,0.376981,0.401502,0.418724,0.377312,0.477729,-0.49313,-0.559291,2
KNN,0.498544,0.488904,0.434456,0.499148,-0.041285,0.326422,0.726663,3
Random Forest,0.455825,0.43792,0.437232,0.455293,0.572095,0.316223,0.070476,4
Decision tree,-0.273552,-0.290429,-0.731187,-0.269212,-0.465933,-0.434059,-0.502783,5
Logistic Regression,-0.80453,-0.807556,-0.695374,-0.807994,-0.206426,-0.497726,-0.56738,6
GradientBoost,-0.094648,-0.079208,0.13231,-0.100373,0.359771,0.993931,0.757953,7
AdaBoost,-0.652863,-0.64732,-0.450326,-0.648885,0.288996,0.122027,0.003886,8
