In [27]:
'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Distressed).
3.Then we balanced the dataset using SMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
file_path="https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-TOPSIS-EVALUATION/Distressed_20.csv"
data = pd.read_csv(file_path)
data.drop_duplicates(inplace=True)
X = data.drop('TOI', axis=1)
y = data['TOI']


smote = SMOTE(sampling_strategy=0.7, random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)


data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVC(C=1,probability=True,kernel='linear',class_weight="balanced",random_state=1)
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')
rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42,max_depth=10,max_features='sqrt',min_samples_split=5)
xgb = XGBClassifier(n_estimators=5000,eval_metric='logloss', random_state=42,max_depth=10,colsample_bytree=1,subsample=0.3,gamma=0.3, scale_pos_weight=0.6)


models = {'SVM': svm, 'KNN': knn,'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.6,0.9, 100):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh


thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}


def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])

data = pd.read_csv("https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-TOPSIS-EVALUATION/Distressed_20.csv")
X = data.drop('TOI', axis=1)
y = data['TOI']

smote = SMOTE(sampling_strategy='auto', random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000,solver='liblinear',penalty='l1',C=0.05, class_weight={0:1,1:1})
lr.fit(X_train, y_train)

y_pred_lr_prob = lr.predict_proba(X_test)[:, 1]


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 200):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh


lr_thresh = optimize_threshold(y_test, y_pred_lr_prob)



y_pred_lr = (y_pred_lr_prob >= lr_thresh).astype(int)


def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

evaluate_model("Logistic Regression", y_test, y_pred_lr, y_pred_lr_prob)



SVM Metrics:
Accuracy: 0.8484848484848485
Precision: 0.8626262626262626
Recall: 0.8484848484848485
F1 Score: 0.8484848484848485
MCC: 0.7111111111111111
AUC-ROC: 0.8999999999999999
G-Mean: 0.8520128672302584
              precision    recall  f1-score   support

           0       0.78      0.93      0.85        15
           1       0.93      0.78      0.85        18

    accuracy                           0.85        33
   macro avg       0.86      0.86      0.85        33
weighted avg       0.86      0.85      0.85        33


KNN Metrics:
Accuracy: 0.8787878787878788
Precision: 0.8856951871657753
Recall: 0.8787878787878788
F1 Score: 0.8790106951871658
MCC: 0.7638428387783323
AUC-ROC: 0.9333333333333333
G-Mean: 0.8819171036881969
              precision    recall  f1-score   support

           0       0.82      0.93      0.88        15
           1       0.94      0.83      0.88        18

    accuracy                           0.88        33
   macro avg       0.88      0.88      

In [30]:
'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Behavioral).
3.Then we balanced the dataset using SMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE,BorderlineSMOTE
from imblearn.over_sampling import ADASYN
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)
data = pd.read_csv("https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-TOPSIS-EVALUATION/behavioral_30.csv")
X = data.drop('TOI', axis=1)
y = data['TOI']

smote = BorderlineSMOTE(sampling_strategy=0.8, random_state=42)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVC(C=1, kernel='linear', probability=True, class_weight='balanced',random_state=42)
knn = KNeighborsClassifier(n_neighbors=3, weights='distance', metric='manhattan')
rf = RandomForestClassifier(n_estimators=500, class_weight='balanced', random_state=42,max_depth=15,max_features='sqrt')
xgb = XGBClassifier(n_estimators=100, eval_metric='logloss',scale_pos_weight=2, random_state=42,max_depth=15, colsample_bytree=1,subsample=0.2,gamma=1)


models = {'SVM': svm, 'KNN': knn,'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}

def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 200):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh

thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}


def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))


for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])



data = pd.read_csv("https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-TOPSIS-EVALUATION/behavioral_30.csv")
X = data.drop('TOI', axis=1)
y = data['TOI']

smote =BorderlineSMOTE(sampling_strategy=1, random_state=42,k_neighbors=1)
X, y = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

lr = LogisticRegression(max_iter=1000, solver='liblinear',C=1,penalty='l1',random_state=42)
lr.fit(X_train, y_train)
y_pred_lr_prob = lr.predict_proba(X_test)[:, 1]


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.linspace(0.1, 0.9, 100):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh

lr_thresh = optimize_threshold(y_test, y_pred_lr_prob)



y_pred_lr = (y_pred_lr_prob >= lr_thresh).astype(int)


def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='weighted'))
    print("Recall:", recall_score(y_true, y_pred, average='weighted'))
    print("F1 Score:", f1_score(y_true, y_pred, average='weighted'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))

evaluate_model("Logistic Regression", y_test, y_pred_lr, y_pred_lr_prob)


SVM Metrics:
Accuracy: 0.8627450980392157
Precision: 0.8638344226579521
Recall: 0.8627450980392157
F1 Score: 0.8629574847347061
MCC: 0.7244616763479587
AUC-ROC: 0.8835403726708073
G-Mean: 0.8633316946034312
              precision    recall  f1-score   support

           0       0.89      0.86      0.87        28
           1       0.83      0.87      0.85        23

    accuracy                           0.86        51
   macro avg       0.86      0.86      0.86        51
weighted avg       0.86      0.86      0.86        51


KNN Metrics:
Accuracy: 0.8431372549019608
Precision: 0.8540305010893247
Recall: 0.8431372549019608
F1 Score: 0.8433785822021116
MCC: 0.696597765719191
AUC-ROC: 0.9021739130434783
G-Mean: 0.8469895538599198
              precision    recall  f1-score   support

           0       0.92      0.79      0.85        28
           1       0.78      0.91      0.84        23

    accuracy                           0.84        51
   macro avg       0.85      0.85      0

In [31]:
'''
1.First we imported all the necessary libraries and packages.
2.Then we uploaded the datasets (Enthusiastic).
3.Then we balanced the dataset using SMOTE.
4.Then We splitted the dataset in 70:30 (train:test) ratio
5.Then we normalized the data.
6.Then we defined and tuned our classifier model, used parameters suitbale for goof performances.
7.Then we found optimized threshhold for each model.
8.Then we evaluated the models and printed the results
'''

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score
from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE,BorderlineSMOTE
from imblearn.combine import SMOTEENN
from imblearn.under_sampling import NearMiss
from imblearn.pipeline import Pipeline
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.metrics import precision_recall_curve
from sklearn.model_selection import GridSearchCV
import warnings
from sklearn.metrics import confusion_matrix

warnings.filterwarnings("ignore", category=FutureWarning)
file_path = 'https://raw.githubusercontent.com/FaisalAbid11/Permutation-Entropy-vs-Modified-TOPSIS/refs/heads/main/AHP-TOPSIS-EVALUATION/enthusiastic_50.csv'
data = pd.read_csv(file_path)
X = data.drop('TOI', axis=1)
y = data['TOI']


smote =SMOTE(sampling_strategy=1,random_state=42)
X, y = smote.fit_resample(X, y)

data_balanced = pd.DataFrame(X, columns=X.columns)
data_balanced['TOI'] = y
data_balanced = data_balanced.sample(frac=1, random_state=42).reset_index(drop=True)



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42,stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



lr =  LogisticRegression(C=1, class_weight={0:0.25,1:2},solver='lbfgs', max_iter=1000,random_state=42)
svm = SVC(C=0.1, kernel='rbf', probability=True, class_weight='balanced',gamma=1,random_state=42)
knn = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='manhattan')
rf = RandomForestClassifier(n_estimators=500, class_weight={0:3,1:1}, random_state=42,max_depth=20,criterion='entropy',min_samples_split=20)
xgb = XGBClassifier(eval_metric='logloss',scale_pos_weight=5, random_state=42)

models = {'SVM': svm, 'KNN': knn, 'Logistic Regression': lr, 'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    model.fit(X_train, y_train)

y_pred_probs = {name: model.predict_proba(X_test)[:, 1] for name, model in models.items()}


def optimize_threshold(y_true, y_prob):
    best_mcc, best_thresh = -1, 0.5
    for thresh in np.arange(0.1, 0.9, 0.005):
        y_pred_adjusted = (y_prob >= thresh).astype(int)
        mcc = matthews_corrcoef(y_true, y_pred_adjusted)
        if mcc > best_mcc:
            best_mcc, best_thresh = mcc, thresh
    return best_thresh


thresholds = {name: optimize_threshold(y_test, y_pred_probs[name]) for name in models}
y_preds = {name: (y_pred_probs[name] >= thresholds[name]).astype(int) for name in models}



def evaluate_model(name, y_true, y_pred, y_prob):
    print(f"\n{name} Metrics:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='binary'))
    print("Recall:", recall_score(y_true, y_pred, average='binary'))
    print("F1 Score:", f1_score(y_true, y_pred, average='binary'))
    print("MCC:", matthews_corrcoef(y_true, y_pred))
    print("AUC-ROC:", roc_auc_score(y_true, y_prob))
    print("G-Mean:", geometric_mean_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))


    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix:")
    print(cm)

for name in models:
    evaluate_model(name, y_test, y_preds[name], y_pred_probs[name])



SVM Metrics:
Accuracy: 0.8778625954198473
Precision: 0.8888888888888888
Recall: 0.8615384615384616
F1 Score: 0.875
MCC: 0.7560067164845063
AUC-ROC: 0.8833333333333333
G-Mean: 0.8775894086434557
              precision    recall  f1-score   support

           0       0.87      0.89      0.88        66
           1       0.89      0.86      0.88        65

    accuracy                           0.88       131
   macro avg       0.88      0.88      0.88       131
weighted avg       0.88      0.88      0.88       131


Confusion Matrix:
[[59  7]
 [ 9 56]]

KNN Metrics:
Accuracy: 0.8625954198473282
Precision: 0.8405797101449275
Recall: 0.8923076923076924
F1 Score: 0.8656716417910447
MCC: 0.726658042293779
AUC-ROC: 0.9241258741258741
G-Mean: 0.8623164985025763
              precision    recall  f1-score   support

           0       0.89      0.83      0.86        66
           1       0.84      0.89      0.87        65

    accuracy                           0.86       131
   macro avg   