In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "Accuracy" : [85.36, 79.84, 96.953, 96.447, 81.733, 89.767, 95.853, 97.107],
    "cross_val_acc" : [84.6, 79.0, 96.6, 95.7, 81.0, 89.1, 95.5, 96.8],
    "roc_auc" : [85.36, 87.506, 99.039, 98.995, 89.74, 96.1, 98.927, 99.176],
    "f1_score" : [85.387, 79.781, 96.954, 96.426, 81.763, 89.682, 95.843, 97.107],
    "diff_cv_acc" : [0.407, 0.84, 0.353, 0.747, 0.733, 0.667, 0.353, 0.307],
    "speed_fit" : [1.697, 0.167, 14.949, 20.749, 16.277, 38.91, 0.364, 4.699],
    "speed_cv" : [3.897, 2.175, 23.285, 17.1, 15.011, 33.689, 2.245, 17.585]
}

In [3]:
index = ["Decision tree", "Logistic Regression", "KNN", "Random Forest", "AdaBoost", "GradientBoost", "LGBM", "XGBM"]

In [4]:
columns = ["Accuracy", "cross_val_acc", "roc_auc", "f1_score", "diff_cv_acc",  "speed_fit", "speed_cv"]

In [5]:
df = pd.DataFrame(data, index = index)
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,85.36,84.6,85.36,85.387,0.407,1.697,3.897
Logistic Regression,79.84,79.0,87.506,79.781,0.84,0.167,2.175
KNN,96.953,96.6,99.039,96.954,0.353,14.949,23.285
Random Forest,96.447,95.7,98.995,96.426,0.747,20.749,17.1
AdaBoost,81.733,81.0,89.74,81.763,0.733,16.277,15.011
GradientBoost,89.767,89.1,96.1,89.682,0.667,38.91,33.689
LGBM,95.853,95.5,98.927,95.843,0.353,0.364,2.245
XGBM,97.107,96.8,99.176,97.107,0.307,4.699,17.585


In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()

In [8]:
scaled_df = scaler.fit_transform(df)

In [9]:
df = pd.DataFrame(scaled_df, index = index, columns = columns)
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,-0.741882,-0.748606,-1.643088,-0.735514,-0.711419,-0.838297,-1.000979
Logistic Regression,-1.55725,-1.55674,-1.251101,-1.563339,1.429638,-0.960107,-1.16551
KNN,0.970539,0.983109,0.855507,0.972557,-0.978434,0.21675,0.851473
Random Forest,0.895797,0.853231,0.84747,0.894589,0.96978,0.678512,0.260519
AdaBoost,-1.277632,-1.268121,-0.843041,-1.270662,0.900554,0.322477,0.060923
GradientBoost,-0.090917,-0.099213,0.318672,-0.101282,0.574204,2.124385,1.845537
LGBM,0.808057,0.824369,0.835049,0.808499,-0.978434,-0.944423,-1.158822
XGBM,0.993287,1.011971,0.880531,0.995151,-1.20589,-0.599296,0.306859


### Weighted Normalization

In [10]:
weights = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]

In [11]:
for i in range(len(df.columns)):
    df[df.columns[i]] = df[df.columns[i]] * weights[i] 
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,-0.370941,-0.374303,-0.821544,-0.367757,-0.35571,-0.419149,-0.50049
Logistic Regression,-0.778625,-0.77837,-0.625551,-0.781669,0.714819,-0.480054,-0.582755
KNN,0.48527,0.491555,0.427754,0.486279,-0.489217,0.108375,0.425737
Random Forest,0.447899,0.426615,0.423735,0.447294,0.48489,0.339256,0.13026
AdaBoost,-0.638816,-0.63406,-0.42152,-0.635331,0.450277,0.161239,0.030461
GradientBoost,-0.045458,-0.049606,0.159336,-0.050641,0.287102,1.062192,0.922769
LGBM,0.404028,0.412184,0.417525,0.404249,-0.489217,-0.472212,-0.579411
XGBM,0.496644,0.505986,0.440266,0.497575,-0.602945,-0.299648,0.15343


### Ideal and Neg_Ideal Solution

In [12]:
criteria = ["Accuracy","cross_val_acc","roc_auc","f1_score", "diff_cv_acc", "speed_fit", "speed_cv" ]
benefit_criteria = ["Accuracy","cross_val_acc","roc_auc","f1_score"]
cost_criteria = ["diff_cv_acc", "speed_fit", "speed_cv"]

In [13]:
A_pos = []
for i in criteria:
    if i in benefit_criteria:
        A_pos.append(df[i].max())
    elif i in cost_criteria:
        A_pos.append(df[i].min())
A_pos = np.round(A_pos, decimals=3)
A_pos  

array([ 0.497,  0.506,  0.44 ,  0.498, -0.603, -0.48 , -0.583])

In [14]:
A_neg = []
for i in criteria:
    if i in benefit_criteria:
        A_neg.append(df[i].min())
    elif i in cost_criteria:
        A_neg.append(df[i].max())
A_neg = np.round(A_neg, decimals=3)
A_neg        

array([-0.779, -0.778, -0.822, -0.782,  0.715,  1.062,  0.923])

### Separation From Positve Ideal Solution

In [15]:
pos_data = df.copy()
for i in range(len(df.columns)):
    pos_data[pos_data.columns[i]] = (pos_data[pos_data.columns[i]] - A_pos[i]) ** 2
pos_data

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,0.7533214,0.7749335,1.591493,0.7495352,0.06115247,0.003702877,0.006807958
Logistic Regression,1.627219,1.649606,1.135398,1.637554,1.736647,2.870146e-09,6.00026e-08
KNN,0.0001375996,0.0002086684,0.0001499767,0.000137388,0.01294662,0.3461849,1.017549
Random Forest,0.002410941,0.006301929,0.0002645502,0.002571049,1.183505,0.6711802,0.5087391
AdaBoost,1.290078,1.299738,0.7422172,1.284439,1.109393,0.411187,0.3763348
GradientBoost,0.294261,0.3086985,0.07877236,0.3010067,0.7922815,2.378357,2.267339
LGBM,0.008643736,0.008801376,0.0005051441,0.008789181,0.01294662,6.065953e-05,1.288147e-05
XGBM,1.270839e-07,2.07272e-10,7.057201e-08,1.80359e-07,3.023688e-09,0.0325269,0.5423284


In [16]:
sep_ideal=[]
for i in range(len(df)):
    sep_ideal.append(np.sqrt(sum(pos_data.iloc[i])))
sep_ideal = np.round(sep_ideal, decimals=3)
sep_ideal

array([1.985, 2.79 , 1.174, 1.541, 2.552, 2.534, 0.199, 0.758])

### Separation From Negative Ideal Solution

In [17]:
neg_data = df.copy()
for i in range(len(df.columns)):
    neg_data[neg_data.columns[i]] = (neg_data[neg_data.columns[i]] - A_neg[i]) ** 2
neg_data

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv
Decision tree,0.1665122,0.1629712,2.081041e-07,0.1715973,1.146419,2.193802,2.026323
Logistic Regression,1.405793e-07,1.369072e-07,0.03859239,1.093114e-07,3.277147e-08,2.377929,2.267298
KNN,1.598378,1.611769,1.561884,1.608531,1.450138,0.909401,0.2472709
Random Forest,1.50528,1.451098,1.551856,1.511165,0.05295053,0.522359,0.6284375
AdaBoost,0.01965156,0.02071861,0.160384,0.02151187,0.07007814,0.8113711,0.7966252
GradientBoost,0.5380835,0.5305572,0.9630201,0.5348863,0.1830967,3.702693e-08,5.352665e-08
LGBM,1.399556,1.416539,1.536421,1.407188,1.450138,2.353805,2.257239
XGBM,1.627266,1.648619,1.593315,1.637313,1.736979,1.854085,0.5922387


In [18]:
sep_neg_ideal=[]
for i in range(len(df)):
    sep_neg_ideal.append(np.sqrt(sum(neg_data.iloc[i])))
sep_neg_ideal = np.round(sep_neg_ideal, decimals=3)
sep_neg_ideal

array([2.422, 2.164, 2.998, 2.688, 1.379, 1.658, 3.438, 3.27 ])

### Relative Closeness

In [19]:
C = []

for i in range(len(df)):
    c = sep_neg_ideal[i]/(sep_ideal[i] + sep_neg_ideal[i])
    C.append(c)
#C = np.round(C, decimals=2)
C

[0.5495802132970274,
 0.436818732337505,
 0.7186001917545541,
 0.6356112556159849,
 0.3508013228186212,
 0.3955152671755725,
 0.9452845751993402,
 0.8118172790466732]

In [20]:
rank =  np.array([sorted(C, reverse = True).index(x) for x in C]) + [1]
rank
df["rank"] = rank
df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv,rank
Decision tree,-0.370941,-0.374303,-0.821544,-0.367757,-0.35571,-0.419149,-0.50049,5
Logistic Regression,-0.778625,-0.77837,-0.625551,-0.781669,0.714819,-0.480054,-0.582755,6
KNN,0.48527,0.491555,0.427754,0.486279,-0.489217,0.108375,0.425737,3
Random Forest,0.447899,0.426615,0.423735,0.447294,0.48489,0.339256,0.13026,4
AdaBoost,-0.638816,-0.63406,-0.42152,-0.635331,0.450277,0.161239,0.030461,8
GradientBoost,-0.045458,-0.049606,0.159336,-0.050641,0.287102,1.062192,0.922769,7
LGBM,0.404028,0.412184,0.417525,0.404249,-0.489217,-0.472212,-0.579411,1
XGBM,0.496644,0.505986,0.440266,0.497575,-0.602945,-0.299648,0.15343,2


In [21]:
print(f"By the TOPSIS method, Your choice should be '{df.index[C.index(max(C))].capitalize()}'.")

By the TOPSIS method, Your choice should be 'Lgbm'.


In [24]:
new_df = df.sort_values('rank')
new_df

Unnamed: 0,Accuracy,cross_val_acc,roc_auc,f1_score,diff_cv_acc,speed_fit,speed_cv,rank
LGBM,0.404028,0.412184,0.417525,0.404249,-0.489217,-0.472212,-0.579411,1
XGBM,0.496644,0.505986,0.440266,0.497575,-0.602945,-0.299648,0.15343,2
KNN,0.48527,0.491555,0.427754,0.486279,-0.489217,0.108375,0.425737,3
Random Forest,0.447899,0.426615,0.423735,0.447294,0.48489,0.339256,0.13026,4
Decision tree,-0.370941,-0.374303,-0.821544,-0.367757,-0.35571,-0.419149,-0.50049,5
Logistic Regression,-0.778625,-0.77837,-0.625551,-0.781669,0.714819,-0.480054,-0.582755,6
GradientBoost,-0.045458,-0.049606,0.159336,-0.050641,0.287102,1.062192,0.922769,7
AdaBoost,-0.638816,-0.63406,-0.42152,-0.635331,0.450277,0.161239,0.030461,8
