<a href="https://colab.research.google.com/github/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/blob/main/AHP-MODIFIED-TOPSIS-EVALUATION/modfied_ahp_TOPSIS_20_30_50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Distressed).
3.Then we balanced the dataset using SMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE,BorderlineSMOTE
from imblearn.over_sampling import ADASYN
from sklearn.metrics import confusion_matrix
import warnings

#Upload and balance Dataset

warnings.filterwarnings("ignore", category=FutureWarning)
data = pd.read_csv("https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-MODIFIED-TOPSIS-EVALUATION/distressed.csv") # load distressed file
X = data.drop('TOI', axis=1)
y = data['TOI']
smote = SMOTE(sampling_strategy=0.6, random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#Normalized the datasets
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Defined classifier models
svm = SVC(C=1, kernel='rbf', probability=True, class_weight='balanced',random_state=42)
knn = KNeighborsClassifier(n_neighbors=11, weights='distance', metric='euclidean')
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42,max_depth=10,max_features='sqrt')
xgb = XGBClassifier(n_estimators=500, eval_metric='logloss',scale_pos_weight=2, random_state=42,max_depth=15, colsample_bytree=1,subsample=0.5)

#Trained the models
models = {'SVM': svm, 'KNN': knn,'Random Forest': rf, 'XGBoost': xgb} # run SVM, KNN, Random forest , XGboost
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}

#Finding Optimized Threshhold
def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 200):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh

thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}

#Test
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}



#Evaluate and Print
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])


# logistic regression
X= data.drop('TOI', axis=1)
y = data['TOI']

smote =SMOTE(sampling_strategy='auto', random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000, C=0.1, solver='newton-cg',penalty='l2',class_weight={0:1,1:1}) # logistic regression function
lr.fit(X_train, y_train)

y_pred_lr_prob = lr.predict_proba(X_test)[:, 1]


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 200):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh


lr_thresh = optimize_threshold(y_test, y_pred_lr_prob)
y_pred_lr = (y_pred_lr_prob >= lr_thresh).astype(int)

#Evaluate and Print
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

evaluate_model("Logistic Regression", y_test, y_pred_lr, y_pred_lr_prob)




SVM Metrics:
Accuracy: 0.9361702127659575
Precision: 0.9378006032444771
Recall: 0.9361702127659575
F1 Score: 0.9365411364690125
MCC: 0.8643995242075444
AUC-ROC: 0.9637254901960784
G-Mean: 0.9372466978064098
              precision    recall  f1-score   support

           0       0.89      0.94      0.91        17
           1       0.97      0.93      0.95        30

    accuracy                           0.94        47
   macro avg       0.93      0.94      0.93        47
weighted avg       0.94      0.94      0.94        47


Confusion Matrix:
[[16  1]
 [ 2 28]]

KNN Metrics:
Accuracy: 0.9148936170212766
Precision: 0.9311043566362714
Recall: 0.9148936170212766
F1 Score: 0.9163333866581348
MCC: 0.8376080835255243
AUC-ROC: 0.9666666666666667
G-Mean: 0.9309493362512627
              precision    recall  f1-score   support

           0       0.81      1.00      0.89        17
           1       1.00      0.87      0.93        30

    accuracy                           0.91        47
 

In [None]:

'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Behavioral).
3.Then we balanced the dataset using BorderlineSMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE,BorderlineSMOTE
from imblearn.over_sampling import ADASYN
from sklearn.metrics import confusion_matrix
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

#Upload and balance the dataset
data = pd.read_csv("https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-MODIFIED-TOPSIS-EVALUATION/behavioral.csv") # load behavioral file
X = data.drop('TOI', axis=1)
y = data['TOI']

smote = BorderlineSMOTE(sampling_strategy=0.8, random_state=42)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#Normalized the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


#Define the models
svm = SVC(C=1, kernel='rbf', probability=True, class_weight='balanced',random_state=42)
knn = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='manhattan')
rf = RandomForestClassifier(n_estimators=500, class_weight='balanced', random_state=42,max_depth=10,max_features='sqrt')
xgb = XGBClassifier(n_estimators=500, eval_metric='logloss',scale_pos_weight=3, random_state=42,max_depth=10, colsample_bytree=1,subsample=0.5,gamma=0.5)

#Train
models = {'SVM': svm, 'KNN': knn,'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}


#Finding Optimized Threshold
def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 200):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh

thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}

#Test
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}

#Evauate And print
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)


for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])


#For Logistic regression
X = data.drop('TOI', axis=1)
y = data['TOI']

smote = BorderlineSMOTE(sampling_strategy=1, random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000,solver='liblinear',C=1,penalty='l1',random_state=42) # logistic regression
lr.fit(X_train, y_train)
y_pred_lr_prob = lr.predict_proba(X_test)[:, 1]


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 100):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh

lr_thresh = optimize_threshold(y_test, y_pred_lr_prob)



y_pred_lr = (y_pred_lr_prob >= lr_thresh).astype(int)

#Evaluate and Print
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

evaluate_model("Logistic Regression", y_test, y_pred_lr, y_pred_lr_prob)




SVM Metrics:
Accuracy: 0.9305555555555556
Precision: 0.9379432624113476
Recall: 0.9305555555555556
F1 Score: 0.9293496765406878
MCC: 0.8629489272626913
AUC-ROC: 0.942857142857143
G-Mean: 0.9128709291752769
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        42
           1       1.00      0.83      0.91        30

    accuracy                           0.93        72
   macro avg       0.95      0.92      0.93        72
weighted avg       0.94      0.93      0.93        72


KNN Metrics:
Accuracy: 0.9305555555555556
Precision: 0.932716049382716
Recall: 0.9305555555555556
F1 Score: 0.9299253881831014
MCC: 0.8583237015949035
AUC-ROC: 0.9416666666666667
G-Mean: 0.9197998401998916
              precision    recall  f1-score   support

           0       0.91      0.98      0.94        42
           1       0.96      0.87      0.91        30

    accuracy                           0.93        72
   macro avg       0.94      0.92      0.

In [None]:

'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Enthusiastic).
3.Then we balanced the dataset using SMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE,BorderlineSMOTE
from imblearn.combine import SMOTEENN
from sklearn.metrics import precision_recall_curve
from sklearn.model_selection import GridSearchCV
import warnings
from sklearn.metrics import confusion_matrix

warnings.filterwarnings("ignore", category=FutureWarning)

#Upload and Balance the Dataset
file_path = 'https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-MODIFIED-TOPSIS-EVALUATION/enthusiastic.csv' # load enthusiastic dataset
data = pd.read_csv(file_path)
X = data.drop('TOI', axis=1)
y = data['TOI']

smote =SMOTE(sampling_strategy=0.8,random_state=42)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

#Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#Normalie Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Define Models
svm = SVC(C=1, kernel='rbf', probability=True, class_weight={0:0.75,1:1.5},random_state=42)
knn = KNeighborsClassifier(n_neighbors=11, weights='distance', metric='euclidean')
rf = RandomForestClassifier(n_estimators=1000, class_weight={0:1,1:1}, random_state=42,max_depth=20,criterion='entropy')
xgb = XGBClassifier(eval_metric='logloss',scale_pos_weight=5, random_state=42,subsample=0.5)
lr =  LogisticRegression(C=0.001, class_weight={0:0.25,1:3},solver='lbfgs', max_iter=1000,random_state=42)


#Train
models = {'SVM': svm, 'KNN': knn,'Random Forest': rf, 'XGBoost': xgb, 'Logistic Regression':lr} # run the classifier models
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}

#Finding optimized threshold

def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.arange(0.1, 0.9, 0.005):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh


thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}

#Test
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}

#Evaluate and print
def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))


    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])




SVM Metrics:
Accuracy: 0.9259259259259259
Precision: 0.9343116701607268
Recall: 0.9259259259259259
F1 Score: 0.9245639187574671
MCC: 0.8545757234070167
AUC-ROC: 0.9655819774718398
G-Mean: 0.9074852129730301
              precision    recall  f1-score   support

           0       0.89      1.00      0.94        47
           1       1.00      0.82      0.90        34

    accuracy                           0.93        81
   macro avg       0.94      0.91      0.92        81
weighted avg       0.93      0.93      0.92        81


Confusion Matrix:
[[47  0]
 [ 6 28]]

KNN Metrics:
Accuracy: 0.9135802469135802
Precision: 0.9247828074988569
Recall: 0.9135802469135802
F1 Score: 0.9116164804094253
MCC: 0.8313702367707394
AUC-ROC: 0.9580725907384231
G-Mean: 0.8911327886790068
              precision    recall  f1-score   support

           0       0.87      1.00      0.93        47
           1       1.00      0.79      0.89        34

    accuracy                           0.91        81
 