In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import (accuracy_score, f1_score, recall_score, precision_score,  roc_curve, auc, confusion_matrix, matthews_corrcoef)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (GradientBoostingClassifier, RandomForestClassifier, HistGradientBoostingClassifier)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from keras.models import Sequential
from keras.layers import  Input, Dense
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
from numpy import interp
import joblib

In [None]:
#Load and define the data
balanced_data = pd.read_csv('C:/Users/Rasmu/OneDrive/DTU\Kunstig intelligens og Data/7. semestre/Bachelor/Machine-Learning-For-Network-Traffic-Analysis/Data/combined_data/final_data_balanced.csv')
imbalanced_data = pd.read_csv('C:/Users/Rasmu/OneDrive/DTU\Kunstig intelligens og Data/7. semestre/Bachelor/Machine-Learning-For-Network-Traffic-Analysis/Data/combined_data/final_data_imbalanced.csv')
#The 4 datasets
X = balanced_data.drop(columns=['label', 'detailed_label'])
y = balanced_data['label']
X_imbalanced = imbalanced_data.drop(columns=['label', 'detailed_label'])
y_imbalanced = imbalanced_data['label']
X_selected_ba = balanced_data[['conn_state_RSTOS0', 'orig_pkts', 'resp_bytes', 'resp_pkts', 'orig_bytes']]
y_selected_ba = balanced_data['label']
X_selected_im = imbalanced_data[['conn_state_RSTOS0', 'orig_pkts', 'resp_bytes', 'resp_pkts', 'orig_bytes']]
y_selected_im = imbalanced_data['label']

#Define the models based on the best hyperparameters from the hyperoptimization.ipynb file
ml_models = [
    DecisionTreeClassifier(criterion='entropy', max_depth=15, max_features='sqrt', min_samples_leaf=1, min_samples_split=5),
    RandomForestClassifier(criterion='gini', max_depth=15, max_features='log2', min_samples_leaf=2, min_samples_split=3, n_estimators=58),
    GaussianNB(),
    GradientBoostingClassifier(learning_rate=0.036847485689015684, max_depth=10, max_features='log2', min_samples_leaf=1, min_samples_split=3, n_estimators=144, subsample=0.9221065703631557),
    KNeighborsClassifier(algorithm='auto', n_neighbors=4, p=2, weights='uniform'),
    HistGradientBoostingClassifier(l2_regularization=0.7111495324380178, learning_rate=0.09095010461397154, max_bins=247, max_depth=15, max_leaf_nodes=42, min_samples_leaf=8),
]

models_names = ['Decision Tree', 'Gradient Boosting', 'KNN', 'Naive Bayes', 'Random Forest', 'Hist Gradient Boosting','MLP']

#Define the result lists
results = []
std = []
average_roc_auc = []
average_std_roc_auc = []
accuracy_scores = []
accuracy_std = []
f1_scores = []
f1_std = []
precision_scores = []
precision_std = []
recall_scores = []
recall_std = []
MCC_values = []
MCC_std = []

In [None]:
#Train the models on the balanced dataset

#Ensure that the folds are stratified
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
mean_fpr = np.linspace(0, 1, 100)

#Plotting helper function
def plot_confusion_matrix_with_std(avg_cm, std_cm, model_name):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    sns.heatmap(avg_cm, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Pred 0', 'Pred 1'], yticklabels=['True 0', 'True 1'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Mean Confusion Matrix for {model_name}')
    plt.subplot(1, 2, 2)
    sns.heatmap(std_cm, annot=True, fmt='.2f', cmap='Oranges', xticklabels=['Pred 0', 'Pred 1'], yticklabels=['True 0', 'True 1'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'STD of Mean Confusion Matrix for {model_name}')
    plt.tight_layout()
    plt.show()

#Models to store model specific data
model_mean_fprs = {} 
model_mean_tprs = {}
model_std_tprs = {}
model_auc = {}

fold = 1 #initialize fold counter

for model in ml_models:
    tpr_list = []
    roc_auc_list = []
    cm_list = [] 

    for train_index, test_index in kf.split(X,y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]

        #Calculate the different metrics
        cm = confusion_matrix(y_test, y_pred)
        cm_list.append(cm)
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        roc_auc_list.append(roc_auc)
        tpr_interpolated = np.interp(mean_fpr, fpr, tpr)
        tpr_interpolated[0] = 0.0
        tpr_list.append(tpr_interpolated)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        mcc = matthews_corrcoef(y_test, y_pred)
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        precision_scores.append(precision)
        recall_scores.append(recall)
        MCC_values.append(mcc)

        results.append([accuracy, f1, precision, recall, mcc])
        std.append([np.std(accuracy_scores), np.std(f1_scores), np.std(precision_scores), np.std(recall_scores), np.std(MCC_values)])

        #Save the model
        model_filename = f"{model.__class__.__name__}_fold_{fold}.joblib"
        joblib.dump(model, model_filename)

        fold += 1
    fold = 1

    #Calculating and showing the mean and standard deviation confusion matrix 
    mean_tpr = np.mean(tpr_list, axis=0)
    std_tpr = np.std(tpr_list, axis=0)
    mean_tpr[-1] = 1.0
    model_mean_fprs[model.__class__.__name__] = mean_fpr
    model_mean_tprs[model.__class__.__name__] = mean_tpr
    model_std_tprs[model.__class__.__name__] = std_tpr
    model_auc[model.__class__.__name__] = (np.mean(roc_auc_list), np.std(roc_auc_list))
    avg_cm = np.mean(cm_list, axis=0)
    std_cm = np.std(cm_list, axis=0)
    plot_confusion_matrix_with_std(avg_cm, std_cm, model.__class__.__name__)

#List for the MLP model
mlp_tpr_list = []
mlp_roc_auc_list = []
mlp_cm_list = []
fold_accuracies = []
history_data = []

#MLP model with hyperparameters from hyperoptimization.ipynb
for train_index, val_index in kf.split(X,y):
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]
    input_dim = X_train.shape[1]
    mlp_model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dropout(0.05),
        Dense(64, activation='relu'),
        Dropout(0.05),
        Dense(16, activation='relu'),
        Dropout(0.05),
        Dense(4, activation='relu'),
        Dropout(0.05),
        Dense(1, activation='sigmoid')
    ])

    optimizer = Adam(learning_rate=0.0007707278591463597)
    mlp_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = mlp_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val),
                  callbacks=[early_stopping], verbose=1)
    
    history_data.append(history.history)

    y_pred = (mlp_model.predict(X_val) > 0.5).astype(int).ravel()
    y_prob = mlp_model.predict(X_val).ravel()
    mlp_model.summary()

    #Calculate the different metrics
    cm = confusion_matrix(y_val, y_pred)
    mlp_cm_list.append(cm)
    fpr, tpr, _ = roc_curve(y_val, y_prob)
    roc_auc = auc(fpr, tpr)
    mlp_roc_auc_list.append(roc_auc)
    tpr_interpolated = np.interp(mean_fpr, fpr, tpr)
    tpr_interpolated[0] = 0.0
    mlp_tpr_list.append(tpr_interpolated)
    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    mcc = matthews_corrcoef(y_val, y_pred)
    accuracy_scores.append(accuracy)
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)
    MCC_values.append(mcc)
    results.append([accuracy, f1, precision, recall, mcc])
    std.append([np.std(accuracy_scores), np.std(f1_scores), np.std(precision_scores), np.std(recall_scores), np.std(MCC_values)])

    #Save the models
    model_filename = f"MLP_fold_{fold}.joblib"
    joblib.dump(model, model_filename)
    fold += 1

#Confusion matrix for MLP
avg_mlp_cm = np.mean(mlp_cm_list, axis=0)
std_mlp_cm = np.std(mlp_cm_list, axis=0)
plot_confusion_matrix_with_std(avg_mlp_cm, std_mlp_cm, "MLP")

mean_mlp_tpr = np.mean(mlp_tpr_list, axis=0)
std_mlp_tpr = np.std(mlp_tpr_list, axis=0)
mean_mlp_tpr[-1] = 1.0
mean_mlp_auc = np.mean(mlp_roc_auc_list)
std_mlp_auc = np.std(mlp_roc_auc_list)

#ROC curve for all models
plt.figure(figsize=(10, 8))
for model_name in model_mean_fprs.keys():
    plt.plot(model_mean_fprs[model_name], model_mean_tprs[model_name], label=f"{model_name} (AUC = {model_auc[model_name][0]:.4f} ± {model_auc[model_name][1]:.4f})")
    plt.fill_between(model_mean_fprs[model_name], model_mean_tprs[model_name] - model_std_tprs[model_name], model_mean_tprs[model_name] + model_std_tprs[model_name], alpha=0.2)
plt.plot(mean_fpr, mean_mlp_tpr, label=f"MLP (AUC = {mean_mlp_auc:.4f} ± {std_mlp_auc:.4f})", color='blue')
plt.fill_between(mean_fpr, mean_mlp_tpr - std_mlp_tpr, mean_mlp_tpr + std_mlp_tpr, color='blue', alpha=0.2)
plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Mean ROC Curve with Standard Deviation for Each Model')
plt.legend(loc="lower right")
plt.show()


#Create the table with the results
models_names = ['Decision Tree', 'Random Forest', 'Naive Bayes', 'Gradient Boosting', 'KNN', 'Hist Gradient Boosting', 'MLP']
DT_results = results[0:5]
RF_results = results[5:10]
GNB_results = results[10:15]
GB_results = results[15:20]
KNN_results = results[20:25]
HGB_results = results[25:30]
MLP_results = results[30:35]
DT_std = std[0]
RF_std = std[1]
GNB_std = std[2]
GB_std = std[3]
KNN_std = std[4]
HGB_std = std[5]
MLP_std = std[6]
result_data = [
    [np.mean(DT_results[0]), np.mean(DT_results[1]), np.mean(DT_results[2]), np.mean(DT_results[3]), np.mean(DT_results[4])],
    [np.mean(RF_results[0]), np.mean(RF_results[1]), np.mean(RF_results[2]), np.mean(RF_results[3]), np.mean(RF_results[4])],
    [np.mean(GNB_results[0]), np.mean(GNB_results[1]), np.mean(GNB_results[2]), np.mean(GNB_results[3]), np.mean(GNB_results[4])],
    [np.mean(GB_results[0]), np.mean(GB_results[1]), np.mean(GB_results[2]), np.mean(GB_results[3]), np.mean(GB_results[4])],
    [np.mean(KNN_results[0]), np.mean(KNN_results[1]), np.mean(KNN_results[2]), np.mean(KNN_results[3]), np.mean(KNN_results[4])],
    [np.mean(HGB_results[0]), np.mean(HGB_results[1]), np.mean(HGB_results[2]), np.mean(HGB_results[3]), np.mean(HGB_results[4])],
    [np.mean(MLP_results[0]), np.mean(MLP_results[1]), np.mean(MLP_results[2]), np.mean(MLP_results[3]), np.mean(MLP_results[4])]
]

results_std = [
    [np.mean(DT_std[0]), np.mean(DT_std[1]), np.mean(DT_std[2]), np.mean(DT_std[3]), np.mean(DT_std[4])],
    [np.mean(GB_std[0]), np.mean(GB_std[1]), np.mean(GB_std[2]), np.mean(GB_std[3]), np.mean(GB_std[4])],
    [np.mean(KNN_std[0]), np.mean(KNN_std[1]), np.mean(KNN_std[2]), np.mean(KNN_std[3]), np.mean(KNN_std[4])],
    [np.mean(GNB_std[0]), np.mean(GNB_std[1]), np.mean(GNB_std[2]), np.mean(GNB_std[3]), np.mean(GNB_std[4])],
    [np.mean(RF_std[0]), np.mean(RF_std[1]), np.mean(RF_std[2]), np.mean(RF_std[3]), np.mean(RF_std[4])],
    [np.mean(HGB_std[0]), np.mean(HGB_std[1]), np.mean(HGB_std[2]), np.mean(HGB_std[3]), np.mean(HGB_std[4])],
    [np.mean(MLP_std[0]), np.mean(MLP_std[1]), np.mean(MLP_std[2]), np.mean(MLP_std[3]), np.mean(MLP_std[4])]
]

result_df = pd.DataFrame(result_data, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
result_df_std = pd.DataFrame(results_std, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
combined_df = result_df.apply(lambda row: [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(row, result_df_std.loc[row.name])], axis=1)
combined_df = pd.DataFrame(combined_df.tolist(), columns=result_df.columns, index=result_df.index)
print(combined_df)

#MLP model accuracy and loss plot
#Initialize lists to store loss and accuracy for each fold
loss_per_fold = []
val_loss_per_fold = []
accuracy_per_fold = []
val_accuracy_per_fold = []

#Extract the loss and accuracy for each fold
for history in history_data:
    loss_per_fold.append(history['loss'])
    val_loss_per_fold.append(history['val_loss'])
    accuracy_per_fold.append(history['accuracy'])
    val_accuracy_per_fold.append(history['val_accuracy'])

#Calculate the values for the accuracy and loss plots
def pad_list(data, max_len):
    return data + [np.nan] * (max_len - len(data))
max_len = max(len(history['loss']) for history in history_data)
loss_per_fold = [pad_list(history['loss'], max_len) for history in history_data]
val_loss_per_fold = [pad_list(history['val_loss'], max_len) for history in history_data]
accuracy_per_fold = [pad_list(history['accuracy'], max_len) for history in history_data]
val_accuracy_per_fold = [pad_list(history['val_accuracy'], max_len) for history in history_data]
mean_loss = np.nanmean(loss_per_fold, axis=0)
std_loss = np.nanstd(loss_per_fold, axis=0)
mean_val_loss = np.nanmean(val_loss_per_fold, axis=0)
std_val_loss = np.nanstd(val_loss_per_fold, axis=0)
mean_accuracy = np.nanmean(accuracy_per_fold, axis=0)
std_accuracy = np.nanstd(accuracy_per_fold, axis=0)
mean_val_accuracy = np.nanmean(val_accuracy_per_fold, axis=0)
std_val_accuracy = np.nanstd(val_accuracy_per_fold, axis=0)

#Plot Loss
epochs = range(1, len(mean_loss) + 1)
plt.figure(figsize=(12, 6))
plt.plot(epochs, mean_loss, label='Training Loss', color='blue')
plt.fill_between(epochs, mean_loss - std_loss, mean_loss + std_loss, color='blue', alpha=0.2)
plt.plot(epochs, mean_val_loss, label='Validation Loss', color='orange')
plt.fill_between(epochs, mean_val_loss - std_val_loss, mean_val_loss + std_val_loss, color='orange', alpha=0.2)
plt.title('Training and Validation Loss (Mean ± Std)', fontsize=14)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

#Plot Accuracy
plt.figure(figsize=(12, 6))
plt.plot(epochs, mean_accuracy, label='Training Accuracy', color='blue')
plt.fill_between(epochs, mean_accuracy - std_accuracy, mean_accuracy + std_accuracy, color='blue', alpha=0.2)
plt.plot(epochs, mean_val_accuracy, label='Validation Accuracy', color='orange')
plt.fill_between(epochs, mean_val_accuracy - std_val_accuracy, mean_val_accuracy + std_val_accuracy, color='orange', alpha=0.2)
plt.title('Training and Validation Accuracy (Mean ± Std)', fontsize=14)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
#Test the models on the imbalanced dataset
#Define the result lists
results = []
std = []
average_roc_auc = []
average_std_roc_auc = []
accuracy_scores = []
accuracy_std = []
f1_scores = []
f1_std = []
precision_scores = []
precision_std = []
recall_scores = []
recall_std = []
MCC_values = []
MCC_std = []

#Define data
x_test = X_imbalanced
y_test = y_imbalanced


#ROC curve plotting settings
fpr_mean = np.linspace(0, 1, 100)
plt.figure(figsize=(10, 8))

#Model types and folds
model_types = ['DecisionTreeClassifier', 'RandomForestClassifier', 'GaussianNB', 'GradientBoostingClassifier', 'KNeighborsClassifier', 'HistGradientBoostingClassifier', 'MLP']
folds_per_model = 5
models = []
std = [[] for _ in range(len(model_types) * folds_per_model)]
conf_matrices = {model_type: [] for model_type in model_types}

#Load models from files
models = []
for model_type in model_types:
    for fold in range(folds_per_model):
        model_filename = f"{model_type.replace(' ', '')}_fold_{fold + 1}.joblib"
        try:
            loaded_model = joblib.load(model_filename)
            models.append(loaded_model)
        except FileNotFoundError:
            print(f"Warning: Model file not found: {model_filename}")
            models.append(None)

#Test each model
for i, model_type in enumerate(model_types):
    tprs = []
    aucs = []
    conf_matrix_sum = np.zeros((2, 2))

    for fold in range(folds_per_model):
        model = models[i * folds_per_model + fold]
        y_pred = model.predict(x_test)
        y_prob = model.predict_proba(x_test)[:, 1]

        #Calculate the different metrics
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        tprs.append(np.interp(fpr_mean, fpr, tpr))
        aucs.append(roc_auc)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        mcc = matthews_corrcoef(y_test, y_pred)
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        precision_scores.append(precision)
        recall_scores.append(recall)
        MCC_values.append(mcc)
        results.append([accuracy, f1, precision, recall, mcc])
        current_model_index = i * folds_per_model + fold
        std[current_model_index].extend([np.std(y_pred), np.std(f1), np.std(precision), np.std(recall), np.std(mcc)])
        conf_matrix = confusion_matrix(y_test, y_pred)
        conf_matrices[model_type].append(conf_matrix)
        conf_matrix_sum += conf_matrix

    # Calculate values for ROC curve
    mean_tpr = np.mean(tprs, axis=0)
    std_tpr = np.std(tprs, axis=0)
    mean_auc = np.mean(aucs)
    std_auc = np.std(aucs)
    plt.plot(fpr_mean, mean_tpr, label=f'{model_type} (AUC = {mean_auc:.2f} ± {std_auc:.2f})')
    plt.fill_between(fpr_mean, mean_tpr - std_tpr, mean_tpr + std_tpr, alpha=0.2)

#ROC plot
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for All Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

#Plot Mean and Standard Deviation Confusion Matrices
for model_type, conf_matrices_list in conf_matrices.items():
    conf_matrices_array = np.array(conf_matrices_list)
    mean_conf_matrix = np.mean(conf_matrices_array, axis=0)
    std_conf_matrix = np.std(conf_matrices_array, axis=0)
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    sns.heatmap(mean_conf_matrix, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Benign', 'Malicious'], yticklabels=['Benign', 'Malicious'], ax=axes[0])
    axes[0].set_title(f'Mean Confusion Matrix for {model_type}')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    sns.heatmap(std_conf_matrix, annot=True, fmt='.2f', cmap='Oranges', xticklabels=['Benign', 'Malicious'], yticklabels=['Benign', 'Malicious'], ax=axes[1])
    axes[1].set_title(f'Standard Deviation Confusion Matrix for {model_type}')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    plt.tight_layout()
    plt.show()

#Create the table with the results
models_names = ['Decision Tree', 'Random Forest', 'Naive Bayes', 'Gradient Boosting', 'KNN', 'Hist Gradient Boosting', 'MLP']
DT_results = results[0:5]
RF_results = results[5:10]
GNB_results = results[10:15]
GB_results = results[15:20]
KNN_results = results[20:25]
HGB_results = results[25:30]
MLP_results = results[30:35]
DT_std = std[0]
RF_std = std[1]
GNB_std = std[2]
GB_std = std[3]
KNN_std = std[4]
HGB_std = std[5]
MLP_std = std[6]
result_data = [
    [np.mean(DT_results[0]), np.mean(DT_results[1]), np.mean(DT_results[2]), np.mean(DT_results[3]), np.mean(DT_results[4])],
    [np.mean(RF_results[0]), np.mean(RF_results[1]), np.mean(RF_results[2]), np.mean(RF_results[3]), np.mean(RF_results[4])],
    [np.mean(GNB_results[0]), np.mean(GNB_results[1]), np.mean(GNB_results[2]), np.mean(GNB_results[3]), np.mean(GNB_results[4])],
    [np.mean(GB_results[0]), np.mean(GB_results[1]), np.mean(GB_results[2]), np.mean(GB_results[3]), np.mean(GB_results[4])],
    [np.mean(KNN_results[0]), np.mean(KNN_results[1]), np.mean(KNN_results[2]), np.mean(KNN_results[3]), np.mean(KNN_results[4])],
    [np.mean(HGB_results[0]), np.mean(HGB_results[1]), np.mean(HGB_results[2]), np.mean(HGB_results[3]), np.mean(HGB_results[4])],
    [np.mean(MLP_results[0]), np.mean(MLP_results[1]), np.mean(MLP_results[2]), np.mean(MLP_results[3]), np.mean(MLP_results[4])]
]

results_std = [
    [np.mean(DT_std[0]), np.mean(DT_std[1]), np.mean(DT_std[2]), np.mean(DT_std[3]), np.mean(DT_std[4])],
    [np.mean(GB_std[0]), np.mean(GB_std[1]), np.mean(GB_std[2]), np.mean(GB_std[3]), np.mean(GB_std[4])],
    [np.mean(KNN_std[0]), np.mean(KNN_std[1]), np.mean(KNN_std[2]), np.mean(KNN_std[3]), np.mean(KNN_std[4])],
    [np.mean(GNB_std[0]), np.mean(GNB_std[1]), np.mean(GNB_std[2]), np.mean(GNB_std[3]), np.mean(GNB_std[4])],
    [np.mean(RF_std[0]), np.mean(RF_std[1]), np.mean(RF_std[2]), np.mean(RF_std[3]), np.mean(RF_std[4])],
    [np.mean(HGB_std[0]), np.mean(HGB_std[1]), np.mean(HGB_std[2]), np.mean(HGB_std[3]), np.mean(HGB_std[4])],
    [np.mean(MLP_std[0]), np.mean(MLP_std[1]), np.mean(MLP_std[2]), np.mean(MLP_std[3]), np.mean(MLP_std[4])]
]

result_df = pd.DataFrame(result_data, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
result_df_std = pd.DataFrame(results_std, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
combined_df = result_df.apply(lambda row: [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(row, result_df_std.loc[row.name])], axis=1)
combined_df = pd.DataFrame(combined_df.tolist(), columns=result_df.columns, index=result_df.index)
print(combined_df)

In [None]:
#Train the models on the selected features from the balanced dataset
#Define the result lists
results = []
std = []
average_roc_auc = []
average_std_roc_auc = []
accuracy_scores = []
accuracy_std = []
f1_scores = []
f1_std = []
precision_scores = []
precision_std = []
recall_scores = []
recall_std = []
MCC_values = []
MCC_std = []

#Define the models based on the best hyperparameters from the hyperoptimization.ipynb file
ml_models = [
    DecisionTreeClassifier(criterion='gini', max_depth=None, max_features='sqrt', min_samples_leaf=1, min_samples_split=2),
    RandomForestClassifier(criterion='gini', max_depth=15, max_features='sqrt', min_samples_leaf=1, min_samples_split=9, n_estimators=177),
    GaussianNB(),
    GradientBoostingClassifier(learning_rate=0.10356349942209475, max_depth=15, max_features='log2', min_samples_leaf=8, min_samples_split=5, n_estimators=148, subsample=0.6861413832808716),
    KNeighborsClassifier(algorithm='kd_tree', n_neighbors=19, p=1, weights='distance'),
    HistGradientBoostingClassifier(l2_regularization=0.7111495324380178, learning_rate=0.09095010461397154, max_bins=247, max_depth=15, max_leaf_nodes=42, min_samples_leaf=8),
]

#Define data
X = X_selected_ba
y = y_selected_ba

#Ensure that the folds are stratified
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)  
mean_fpr = np.linspace(0, 1, 100) 

#Plotting helper function
def plot_confusion_matrix_with_std(avg_cm, std_cm, model_name):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    sns.heatmap(avg_cm, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Pred 0', 'Pred 1'], yticklabels=['True 0', 'True 1'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Mean Confusion Matrix for {model_name}')
    plt.subplot(1, 2, 2)
    sns.heatmap(std_cm, annot=True, fmt='.2f', cmap='Oranges', xticklabels=['Pred 0', 'Pred 1'], yticklabels=['True 0', 'True 1'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'STD of Mean Confusion Matrix for {model_name}')
    plt.tight_layout()
    plt.show()


#Models to store model specific data
model_mean_fprs = {} 
model_mean_tprs = {}
model_std_tprs = {}
model_auc = {}

fold = 1 #initialize fold counter

for model in ml_models:
    tpr_list = []
    roc_auc_list = []
    cm_list = [] 

    for train_index, test_index in kf.split(X,y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]

        #Calculate the different metrics
        cm = confusion_matrix(y_test, y_pred)
        cm_list.append(cm)
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        roc_auc_list.append(roc_auc)
        tpr_interpolated = np.interp(mean_fpr, fpr, tpr)
        tpr_interpolated[0] = 0.0
        tpr_list.append(tpr_interpolated)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        mcc = matthews_corrcoef(y_test, y_pred)
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        precision_scores.append(precision)
        recall_scores.append(recall)
        MCC_values.append(mcc)

        results.append([accuracy, f1, precision, recall, mcc])
        std.append([np.std(accuracy_scores), np.std(f1_scores), np.std(precision_scores), np.std(recall_scores), np.std(MCC_values)])

        #Save the model
        model_filename = f"{model.__class__.__name__}_fold_{fold}.joblib"
        joblib.dump(model, model_filename)

        fold += 1
    fold = 1

    #Calculating and showing the mean and standard deviation confusion matrix 
    mean_tpr = np.mean(tpr_list, axis=0)
    std_tpr = np.std(tpr_list, axis=0)
    mean_tpr[-1] = 1.0
    model_mean_fprs[model.__class__.__name__] = mean_fpr
    model_mean_tprs[model.__class__.__name__] = mean_tpr
    model_std_tprs[model.__class__.__name__] = std_tpr
    model_auc[model.__class__.__name__] = (np.mean(roc_auc_list), np.std(roc_auc_list))
    avg_cm = np.mean(cm_list, axis=0)
    std_cm = np.std(cm_list, axis=0)
    plot_confusion_matrix_with_std(avg_cm, std_cm, model.__class__.__name__)


#List for the MLP model
mlp_tpr_list = []
mlp_roc_auc_list = []
mlp_cm_list = []
fold_accuracies = []
history_data = []

#MLP model with hyperparameters from hyperoptimization.ipynb
for train_index, val_index in kf.split(X,y):
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]
    input_dim = X_train.shape[1]
    mlp_2_model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dropout(0.01),
        Dense(128, activation='relu'),
        Dropout(0.01),
        Dense(32, activation='relu'),
        Dropout(0.01),
        Dense(4, activation='relu'),
        Dropout(0.01),
        Dense(1, activation='sigmoid')
    ])

    optimizer = Adam(learning_rate=0.0006915151366011638)
    mlp_2_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = mlp_2_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val),
                  callbacks=[early_stopping], verbose=1)
    
    history_data.append(history.history)
    y_pred = (mlp_2_model.predict(X_val) > 0.5).astype(int).ravel()
    y_prob = mlp_2_model.predict(X_val).ravel()
    mlp_2_model.summary()



    cm = confusion_matrix(y_val, y_pred)
    mlp_cm_list.append(cm)
    fpr, tpr, _ = roc_curve(y_val, y_prob)
    roc_auc = auc(fpr, tpr)
    mlp_roc_auc_list.append(roc_auc)
    tpr_interpolated = np.interp(mean_fpr, fpr, tpr)
    tpr_interpolated[0] = 0.0
    mlp_tpr_list.append(tpr_interpolated)
    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    mcc = matthews_corrcoef(y_val, y_pred)
    accuracy_scores.append(accuracy)
    f1_scores.append(f1)
    precision_scores.append(precision)
    recall_scores.append(recall)
    MCC_values.append(mcc)
    results.append([accuracy, f1, precision, recall, mcc])
    std.append([np.std(accuracy_scores), np.std(f1_scores), np.std(precision_scores), np.std(recall_scores), np.std(MCC_values)])
    model_filename = f"MLP_fold_{fold}.joblib"
    joblib.dump(model, model_filename)
    fold += 1

#Confusion matrix for MLP
avg_mlp_cm = np.mean(mlp_cm_list, axis=0)
std_mlp_cm = np.std(mlp_cm_list, axis=0)
plot_confusion_matrix_with_std(avg_mlp_cm, std_mlp_cm, "MLP")

mean_mlp_tpr = np.mean(mlp_tpr_list, axis=0)
std_mlp_tpr = np.std(mlp_tpr_list, axis=0)
mean_mlp_tpr[-1] = 1.0
mean_mlp_auc = np.mean(mlp_roc_auc_list)
std_mlp_auc = np.std(mlp_roc_auc_list)

#ROC curve for all models
plt.figure(figsize=(10, 8))
for model_name in model_mean_fprs.keys():
    plt.plot(model_mean_fprs[model_name], model_mean_tprs[model_name], label=f"{model_name} (AUC = {model_auc[model_name][0]:.4f} ± {model_auc[model_name][1]:.4f})")
    plt.fill_between(model_mean_fprs[model_name], model_mean_tprs[model_name] - model_std_tprs[model_name], model_mean_tprs[model_name] + model_std_tprs[model_name], alpha=0.2)
plt.plot(mean_fpr, mean_mlp_tpr, label=f"MLP (AUC = {mean_mlp_auc:.4f} ± {std_mlp_auc:.4f})", color='blue')
plt.fill_between(mean_fpr, mean_mlp_tpr - std_mlp_tpr, mean_mlp_tpr + std_mlp_tpr, color='blue', alpha=0.2)
plt.plot([0, 1], [0, 1], 'k--', lw=2)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Mean ROC Curve with Standard Deviation for Each Model')
plt.legend(loc="lower right")
plt.show()


#Create the table with the results
models_names = ['Decision Tree', 'Random Forest', 'Naive Bayes', 'Gradient Boosting', 'KNN', 'Hist Gradient Boosting', 'MLP']
DT_results = results[0:5]
RF_results = results[5:10]
GNB_results = results[10:15]
GB_results = results[15:20]
KNN_results = results[20:25]
HGB_results = results[25:30]
MLP_results = results[30:35]
DT_std = std[0]
RF_std = std[1]
GNB_std = std[2]
GB_std = std[3]
KNN_std = std[4]
HGB_std = std[5]
MLP_std = std[6]
result_data = [
    [np.mean(DT_results[0]), np.mean(DT_results[1]), np.mean(DT_results[2]), np.mean(DT_results[3]), np.mean(DT_results[4])],
    [np.mean(RF_results[0]), np.mean(RF_results[1]), np.mean(RF_results[2]), np.mean(RF_results[3]), np.mean(RF_results[4])],
    [np.mean(GNB_results[0]), np.mean(GNB_results[1]), np.mean(GNB_results[2]), np.mean(GNB_results[3]), np.mean(GNB_results[4])],
    [np.mean(GB_results[0]), np.mean(GB_results[1]), np.mean(GB_results[2]), np.mean(GB_results[3]), np.mean(GB_results[4])],
    [np.mean(KNN_results[0]), np.mean(KNN_results[1]), np.mean(KNN_results[2]), np.mean(KNN_results[3]), np.mean(KNN_results[4])],
    [np.mean(HGB_results[0]), np.mean(HGB_results[1]), np.mean(HGB_results[2]), np.mean(HGB_results[3]), np.mean(HGB_results[4])],
    [np.mean(MLP_results[0]), np.mean(MLP_results[1]), np.mean(MLP_results[2]), np.mean(MLP_results[3]), np.mean(MLP_results[4])]
]

results_std = [
    [np.mean(DT_std[0]), np.mean(DT_std[1]), np.mean(DT_std[2]), np.mean(DT_std[3]), np.mean(DT_std[4])],
    [np.mean(GB_std[0]), np.mean(GB_std[1]), np.mean(GB_std[2]), np.mean(GB_std[3]), np.mean(GB_std[4])],
    [np.mean(KNN_std[0]), np.mean(KNN_std[1]), np.mean(KNN_std[2]), np.mean(KNN_std[3]), np.mean(KNN_std[4])],
    [np.mean(GNB_std[0]), np.mean(GNB_std[1]), np.mean(GNB_std[2]), np.mean(GNB_std[3]), np.mean(GNB_std[4])],
    [np.mean(RF_std[0]), np.mean(RF_std[1]), np.mean(RF_std[2]), np.mean(RF_std[3]), np.mean(RF_std[4])],
    [np.mean(HGB_std[0]), np.mean(HGB_std[1]), np.mean(HGB_std[2]), np.mean(HGB_std[3]), np.mean(HGB_std[4])],
    [np.mean(MLP_std[0]), np.mean(MLP_std[1]), np.mean(MLP_std[2]), np.mean(MLP_std[3]), np.mean(MLP_std[4])]
]

result_df = pd.DataFrame(result_data, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
result_df_std = pd.DataFrame(results_std, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
combined_df = result_df.apply(lambda row: [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(row, result_df_std.loc[row.name])], axis=1)
combined_df = pd.DataFrame(combined_df.tolist(), columns=result_df.columns, index=result_df.index)
print(combined_df)

#MLP model accuracy and loss plot
#Initialize lists to store loss and accuracy for each fold
loss_per_fold = []
val_loss_per_fold = []
accuracy_per_fold = []
val_accuracy_per_fold = []

#Extract the loss and accuracy for each fold
for history in history_data:
    loss_per_fold.append(history['loss'])
    val_loss_per_fold.append(history['val_loss'])
    accuracy_per_fold.append(history['accuracy'])
    val_accuracy_per_fold.append(history['val_accuracy'])

#Calculate the values for the accuracy and loss plots
def pad_list(data, max_len):
    return data + [np.nan] * (max_len - len(data))
max_len = max(len(history['loss']) for history in history_data)
loss_per_fold = [pad_list(history['loss'], max_len) for history in history_data]
val_loss_per_fold = [pad_list(history['val_loss'], max_len) for history in history_data]
accuracy_per_fold = [pad_list(history['accuracy'], max_len) for history in history_data]
val_accuracy_per_fold = [pad_list(history['val_accuracy'], max_len) for history in history_data]
mean_loss = np.nanmean(loss_per_fold, axis=0)
std_loss = np.nanstd(loss_per_fold, axis=0)
mean_val_loss = np.nanmean(val_loss_per_fold, axis=0)
std_val_loss = np.nanstd(val_loss_per_fold, axis=0)
mean_accuracy = np.nanmean(accuracy_per_fold, axis=0)
std_accuracy = np.nanstd(accuracy_per_fold, axis=0)
mean_val_accuracy = np.nanmean(val_accuracy_per_fold, axis=0)
std_val_accuracy = np.nanstd(val_accuracy_per_fold, axis=0)

#Plot Loss
epochs = range(1, len(mean_loss) + 1)
plt.figure(figsize=(12, 6))
plt.plot(epochs, mean_loss, label='Training Loss', color='blue')
plt.fill_between(epochs, mean_loss - std_loss, mean_loss + std_loss, color='blue', alpha=0.2)
plt.plot(epochs, mean_val_loss, label='Validation Loss', color='orange')
plt.fill_between(epochs, mean_val_loss - std_val_loss, mean_val_loss + std_val_loss, color='orange', alpha=0.2)
plt.title('Training and Validation Loss (Mean ± Std)', fontsize=14)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

#Plot Accuracy
plt.figure(figsize=(12, 6))
plt.plot(epochs, mean_accuracy, label='Training Accuracy', color='blue')
plt.fill_between(epochs, mean_accuracy - std_accuracy, mean_accuracy + std_accuracy, color='blue', alpha=0.2)
plt.plot(epochs, mean_val_accuracy, label='Validation Accuracy', color='orange')
plt.fill_between(epochs, mean_val_accuracy - std_val_accuracy, mean_val_accuracy + std_val_accuracy, color='orange', alpha=0.2)
plt.title('Training and Validation Accuracy (Mean ± Std)', fontsize=14)
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
#Test the models on the selected features from the imbalanced dataset
#Initialize lists for storing metrics
results = []
std = []
average_roc_auc = []
average_std_roc_auc = []
accuracy_scores = []
accuracy_std = []
f1_scores = []
f1_std = []
precision_scores = []
precision_std = []
recall_scores = []
recall_std = []
MCC_values = []
MCC_std = []

#Define test set
x_test = X_selected_im
y_test = y_selected_im

#ROC curve plotting settings
fpr_mean = np.linspace(0, 1, 100)
plt.figure(figsize=(10, 8))

#Model types and folds
model_types = ['DecisionTreeClassifier', 'RandomForestClassifier', 'GaussianNB', 'GradientBoostingClassifier', 'KNeighborsClassifier', 'HistGradientBoostingClassifier', 'MLP']
folds_per_model = 5
models = []
std = [[] for _ in range(len(model_types) * folds_per_model)]
conf_matrices = {model_type: [] for model_type in model_types}

#Load models from files
models = []
for model_type in model_types:
    for fold in range(folds_per_model):
        model_filename = f"{model_type.replace(' ', '')}_fold_{fold + 1}.joblib"
        try:
            loaded_model = joblib.load(model_filename)
            models.append(loaded_model)
        except FileNotFoundError:
            print(f"Warning: Model file not found: {model_filename}")
            models.append(None)

#Test each model
for i, model_type in enumerate(model_types):
    tprs = []
    aucs = []
    conf_matrix_sum = np.zeros((2, 2))

    for fold in range(folds_per_model):
        model = models[i * folds_per_model + fold]
        y_pred = model.predict(x_test)
        y_prob = model.predict_proba(x_test)[:, 1]

        #Calculate the different metrics
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)
        tprs.append(np.interp(fpr_mean, fpr, tpr))
        aucs.append(roc_auc)
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        mcc = matthews_corrcoef(y_test, y_pred)
        accuracy_scores.append(accuracy)
        f1_scores.append(f1)
        precision_scores.append(precision)
        recall_scores.append(recall)
        MCC_values.append(mcc)
        results.append([accuracy, f1, precision, recall, mcc])
        current_model_index = i * folds_per_model + fold
        std[current_model_index].extend([np.std(y_pred), np.std(f1), np.std(precision), np.std(recall), np.std(mcc)])
        conf_matrix = confusion_matrix(y_test, y_pred)
        conf_matrices[model_type].append(conf_matrix)
        conf_matrix_sum += conf_matrix

    # Calculate values for ROC curve
    mean_tpr = np.mean(tprs, axis=0)
    std_tpr = np.std(tprs, axis=0)
    mean_auc = np.mean(aucs)
    std_auc = np.std(aucs)
    plt.plot(fpr_mean, mean_tpr, label=f'{model_type} (AUC = {mean_auc:.2f} ± {std_auc:.2f})')
    plt.fill_between(fpr_mean, mean_tpr - std_tpr, mean_tpr + std_tpr, alpha=0.2)

#ROC plot
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for All Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

#Plot Mean and Standard Deviation Confusion Matrices
for model_type, conf_matrices_list in conf_matrices.items():
    conf_matrices_array = np.array(conf_matrices_list)
    mean_conf_matrix = np.mean(conf_matrices_array, axis=0)
    std_conf_matrix = np.std(conf_matrices_array, axis=0)
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    sns.heatmap(mean_conf_matrix, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Benign', 'Malicious'], yticklabels=['Benign', 'Malicious'], ax=axes[0])
    axes[0].set_title(f'Mean Confusion Matrix for {model_type}')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    sns.heatmap(std_conf_matrix, annot=True, fmt='.2f', cmap='Oranges', xticklabels=['Benign', 'Malicious'], yticklabels=['Benign', 'Malicious'], ax=axes[1])
    axes[1].set_title(f'Standard Deviation Confusion Matrix for {model_type}')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    plt.tight_layout()
    plt.show()

#Create the table with the results
models_names = ['Decision Tree', 'Random Forest', 'Naive Bayes', 'Gradient Boosting', 'KNN', 'Hist Gradient Boosting', 'MLP']
DT_results = results[0:5]
RF_results = results[5:10]
GNB_results = results[10:15]
GB_results = results[15:20]
KNN_results = results[20:25]
HGB_results = results[25:30]
MLP_results = results[30:35]
DT_std = std[0]
RF_std = std[1]
GNB_std = std[2]
GB_std = std[3]
KNN_std = std[4]
HGB_std = std[5]
MLP_std = std[6]
result_data = [
    [np.mean(DT_results[0]), np.mean(DT_results[1]), np.mean(DT_results[2]), np.mean(DT_results[3]), np.mean(DT_results[4])],
    [np.mean(RF_results[0]), np.mean(RF_results[1]), np.mean(RF_results[2]), np.mean(RF_results[3]), np.mean(RF_results[4])],
    [np.mean(GNB_results[0]), np.mean(GNB_results[1]), np.mean(GNB_results[2]), np.mean(GNB_results[3]), np.mean(GNB_results[4])],
    [np.mean(GB_results[0]), np.mean(GB_results[1]), np.mean(GB_results[2]), np.mean(GB_results[3]), np.mean(GB_results[4])],
    [np.mean(KNN_results[0]), np.mean(KNN_results[1]), np.mean(KNN_results[2]), np.mean(KNN_results[3]), np.mean(KNN_results[4])],
    [np.mean(HGB_results[0]), np.mean(HGB_results[1]), np.mean(HGB_results[2]), np.mean(HGB_results[3]), np.mean(HGB_results[4])],
    [np.mean(MLP_results[0]), np.mean(MLP_results[1]), np.mean(MLP_results[2]), np.mean(MLP_results[3]), np.mean(MLP_results[4])]
]

results_std = [
    [np.mean(DT_std[0]), np.mean(DT_std[1]), np.mean(DT_std[2]), np.mean(DT_std[3]), np.mean(DT_std[4])],
    [np.mean(GB_std[0]), np.mean(GB_std[1]), np.mean(GB_std[2]), np.mean(GB_std[3]), np.mean(GB_std[4])],
    [np.mean(KNN_std[0]), np.mean(KNN_std[1]), np.mean(KNN_std[2]), np.mean(KNN_std[3]), np.mean(KNN_std[4])],
    [np.mean(GNB_std[0]), np.mean(GNB_std[1]), np.mean(GNB_std[2]), np.mean(GNB_std[3]), np.mean(GNB_std[4])],
    [np.mean(RF_std[0]), np.mean(RF_std[1]), np.mean(RF_std[2]), np.mean(RF_std[3]), np.mean(RF_std[4])],
    [np.mean(HGB_std[0]), np.mean(HGB_std[1]), np.mean(HGB_std[2]), np.mean(HGB_std[3]), np.mean(HGB_std[4])],
    [np.mean(MLP_std[0]), np.mean(MLP_std[1]), np.mean(MLP_std[2]), np.mean(MLP_std[3]), np.mean(MLP_std[4])]
]

result_df = pd.DataFrame(result_data, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
result_df_std = pd.DataFrame(results_std, columns=['accuracy', 'f1', 'precision', 'recall', 'MCC'], index=models_names)
combined_df = result_df.apply(lambda row: [f"{mean:.4f} ± {std:.4f}" for mean, std in zip(row, result_df_std.loc[row.name])], axis=1)
combined_df = pd.DataFrame(combined_df.tolist(), columns=result_df.columns, index=result_df.index)
print(combined_df)
