In [107]:
from Metaheuristicas.fitness_functions import *


In [108]:
X, y = load_and_preprocess_data(filename='Resources/SeisBenchV1_v1_1.json')


In [109]:
from Metaheuristicas.Genetico import genetic_algorithm

mutation = 0.1
crossover = 0.9

In [110]:
import pandas as pd
from IPython.display import display, clear_output

# Step 1: Initialize empty DataFrames for each classifier with metrics as columns
metrics = ["Accuracy", "Precision", "Recall", "F1 Score", "AUC"]

naive_bayes_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])
random_forest_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])
neural_network_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])

# Display all tables function
def display_tables():
    clear_output(wait=True)
    print("Naive Bayes Results")
    display(naive_bayes_df)
    print("Random Forest Results")
    display(random_forest_df)
    print("Neural Network Results")
    display(neural_network_df)

In [111]:
def add_result(classifier, fitness_function, accuracy, precision, recall, f1_score, auc):
    new_data = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1_score,
        "AUC": auc
    }

    if classifier == "Naive Bayes":
        global naive_bayes_df
        naive_bayes_df.loc[fitness_function] = new_data
    elif classifier == "Random Forest":
        global random_forest_df
        random_forest_df.loc[fitness_function] = new_data
    elif classifier == "Neural Network":
        global neural_network_df
        neural_network_df.loc[fitness_function] = new_data

In [112]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
def confusion_matrix_heatmap(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

# Split

In [113]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

#dataset split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


# Models

In [114]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
DT = RandomForestClassifier()

# Feature Selection

In [115]:
# print the features avaiable
X.columns

Index(['f1_t_mean', 'f2_t_std', 'f3_t_var', 'f4_t_entropy', 'f5_t_kurtosis',
       'f6_t_multiscaleEntropy', 'f7_t_time2peak', 'f8_t_rms',
       'f9_t_peak2peak', 'f10_t_peak2rms', 'f11_t_energy', 'f12_t_zcr',
       'f13_t_PeaksAboveRMSDensity_fun', 'f14_f_peaks_pos_1',
       'f15_f_90_percent_energy', 'f16_f_entropy', 'f17_f_mean', 'f18_f_std',
       'f19_f_var', 'f20_f_energy', 'f21_f_kurtosis',
       'f22_f_multiscaleEntropy', 'f23_f_peak_1020_value',
       'f24_f_peak_1020_pos', 'f25_f_peak_2030_value', 'f26_f_peak_2030_pos',
       'f27_f_rms', 'f28_f_peak2rms', 'f29_f_power',
       'f30_f_PeaksAboveRMSDensity_fun', 'f31_f_peaks_val_2',
       'f32_f_peaks_pos_2', 'f33_f_peaks_val_3', 'f34_f_peaks_pos_3',
       'f35_w_f_maxval_A6', 'f36_w_f_maxval_D1', 'f37_w_f_maxval_D2',
       'f38_w_f_maxval_D3', 'f39_w_f_maxval_D4', 'f40_w_f_maxval_D5',
       'f41_w_f_maxval_D6', 'f42_w_f_maxpos_A6', 'f43_w_f_maxpos_D2',
       'f44_w_f_maxpos_D3', 'f45_w_f_maxpos_D4', 'f46_w_f_maxp

In [116]:
def seleccionar_caracteristicas(df, indices):
    prefijos = [f"f{n}_" for n in indices]
    columnas_seleccionadas = [col for col in df.columns if any(col.startswith(prefijo) for prefijo in prefijos)]
    return df[columnas_seleccionadas]


In [117]:
# def seleccionar_caracteristicas(df, indices):
#     return [col for col in df.columns if any(col.startswith(f"f{n}_") for n in indices)]


## GA

In [118]:
GaMiFtIndices = [2, 3, 5, 6, 7, 9, 11, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 25, 27, 30, 32, 34, 36, 37]
GaX2FtIndices = [9, 11, 12, 13, 16, 19, 21, 23, 28, 30, 34,38, 39, 40, 48, 55, 58, 59, 60, 61, 65, 66,70, 71, 72, 74, 75, 76, 77, 84]
GaReliefFFtIndices = [2, 4, 9, 11, 14, 16, 17, 19, 21, 23, 25, 27,38, 39, 42, 49, 55, 58, 62, 64, 65, 66, 67,69, 71, 72, 76, 78, 81]

In [119]:
X_train_GaMiFt = seleccionar_caracteristicas(X_train,GaMiFtIndices)
X_train_GaX2Ft = seleccionar_caracteristicas(X_train, GaX2FtIndices)
X_train_GaReliefFFt = seleccionar_caracteristicas(X_train, GaReliefFFtIndices)


## CS

In [120]:
CsMiFtIndices = [3, 5, 6, 10, 11, 12, 13, 14, 15, 17, 18, 21,23, 24, 25, 26, 28, 30, 31, 32, 33, 34, 35]
CsX2FtIndices = [1, 7, 10, 19, 20, 24, 26, 27, 30, 32, 34, 37,38, 42, 58, 60, 61, 64, 65, 67, 68, 69, 72, 77]
CsReliefFFtIndices = [4, 6, 7, 10, 13, 15, 19, 22, 23, 29, 33, 39,42, 50, 55, 57, 58, 59, 62, 63, 64, 65, 67]

In [121]:
X_train_CsMiFt = seleccionar_caracteristicas(X_train,CsMiFtIndices)
X_train_CsX2Ft = seleccionar_caracteristicas(X_train, CsX2FtIndices)
X_train_CsReliefFFt = seleccionar_caracteristicas(X_train, CsReliefFFtIndices)


# Functions

In [122]:
from sklearn.model_selection import KFold, train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

In [123]:
def evaluate_naive_bayes_with_kfold(X_train, y_train, k=10, holdout = None, y_test = None):
    

    # Lists to store metrics during cross-validation
    accuracies, precisions, recalls, f1s, aucs = [], [], [], [], []

    if k > 1:
        kf = KFold(n_splits=k, shuffle=True, random_state=42)
        # Perform k-Fold Cross-Validation
        for train_index, val_index in kf.split(X_train):
            X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
            y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
    
            # Run it with Naive Bayes
            NB.fit(X_train_fold, y_train_fold)
            y_pred = NB.predict(X_val_fold)
    
            # Calculate metrics
            accuracy = accuracy_score(y_val_fold, y_pred)
            precision = precision_score(y_val_fold, y_pred)
            recall = recall_score(y_val_fold, y_pred)
            f1 = f1_score(y_val_fold, y_pred)
            auc = roc_auc_score(y_val_fold, y_pred)
    
            # Append metrics to lists
            accuracies.append(accuracy)
            precisions.append(precision)
            recalls.append(recall)
            f1s.append(f1)
            aucs.append(auc)
    
            # Calculate mean and std for each metric
        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)
        # Add results to DataFrame
        add_result("Naive Bayes", "Mutual Information", mean_accuracy, mean_precision, mean_recall, mean_f1, mean_auc)
    
            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")
    
            
           
    
        NBMIScores = [accuracies, precisions, recalls, f1s, aucs]
        NBMISTD = [std_accuracy, std_precision, std_recall, std_f1, std_auc]
        return NBMIScores, NBMISTD
    else:
        # Run it with Naive Bayes
        NB.fit(X_train, y_train)
        y_pred = NB.predict(holdout)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred)

        # Append metrics to lists
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        aucs.append(auc)

        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)

            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")


In [124]:
def evaluate_random_forest_with_kfold(X_train, y_train, k=10,holdout = None, y_test = None):

    # Lists to store metrics during cross-validation
    accuracies, precisions, recalls, f1s, aucs = [], [], [], [], []

    # Perform k-Fold Cross-Validation
    if k > 1:
        kf = KFold(n_splits=k, shuffle=True, random_state=42)

        for train_index, val_index in kf.split(X_train):
            X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
            y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
    
            # Run it with Naive Bayes
            DT.fit(X_train_fold, y_train_fold)
            y_pred = DT.predict(X_val_fold)
    
            # Calculate metrics
            accuracy = accuracy_score(y_val_fold, y_pred)
            precision = precision_score(y_val_fold, y_pred)
            recall = recall_score(y_val_fold, y_pred)
            f1 = f1_score(y_val_fold, y_pred)
            auc = roc_auc_score(y_val_fold, y_pred)
    
            # Append metrics to lists
            accuracies.append(accuracy)
            precisions.append(precision)
            recalls.append(recall)
            f1s.append(f1)
            aucs.append(auc)
    
            # Calculate mean and std for each metric
        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)
    
            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")
    
            # Add results to DataFrame
        add_result("Naive Bayes", "Mutual Information", mean_accuracy, mean_precision, mean_recall, mean_f1, mean_auc)
            # Display all tables
    
        RfMIScores = [accuracies, precisions, recalls, f1s, aucs]
        RfMISTD = [std_accuracy, std_precision, std_recall, std_f1, std_auc]
        return RfMIScores, RfMISTD
    else:
        # Run it with random forest
        DT.fit(X_train, y_train)
        y_pred = DT.predict(holdout)

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred)

        # Append metrics to lists
        accuracies.append(accuracy)
        precisions.append(precision)
        recalls.append(recall)
        f1s.append(f1)
        aucs.append(auc)

        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)

            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")
    


In [125]:
def evaluate_neural_network_with_kfold(X_train, y_train, k=10, holdout = None,y_test = None):

    # Lists to store metrics during cross-validation
    accuracies, precisions, recalls, f1s, aucs = [], [], [], [], []

    # Perform k-Fold Cross-Validation
    if k > 1:
        kf = KFold(n_splits=k, shuffle=True, random_state=42)

        for train_index, val_index in kf.split(X_train):
            X_train_fold, X_val_fold = X_train.iloc[train_index], X_train.iloc[val_index]
            y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
    
            # Define a new model in each fold
            input_dim = X_train_fold.shape[1]
            model = Sequential([
                Input(shape=(input_dim,)),
                Dense(64, activation='relu'),
                Dense(32, activation='relu'),
                Dense(1, activation='sigmoid')
            ])
            model.compile(optimizer=Adam(learning_rate=0.001), loss=BinaryCrossentropy(), metrics=['accuracy'])
    
            # Train the model on the fold
            model.fit(X_train_fold, y_train_fold, epochs=100, batch_size=32, verbose=0)
    
            # Evaluate on validation fold
            y_pred_prob = model.predict(X_val_fold)
            y_pred = (y_pred_prob > 0.5).astype(int)
    
            # Calculate metrics
            accuracies.append(accuracy_score(y_val_fold, y_pred))
            precisions.append(precision_score(y_val_fold, y_pred))
            recalls.append(recall_score(y_val_fold, y_pred))
            f1s.append(f1_score(y_val_fold, y_pred))
            aucs.append(roc_auc_score(y_val_fold, y_pred_prob))
    
        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)
    
            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")
    
        # # Print metrics
        # print(f"Accuracy: {avg_acc:.4f}")
        # print(f"Precision: {avg_prec:.4f}")
        # print(f"Recall: {avg_rec:.4f}")
        # print(f"F1 Score: {avg_f1:.4f}")
        # print(f"AUC: {avg_auc:.4f}")
    
        # Add results to DataFrame
        add_result("Neural Network", "Mutual Information", mean_accuracy, mean_precision, mean_recall, mean_f1, mean_auc)
    
    
    
        NN_MIScores = [accuracies, precisions, recalls, f1s, aucs]
        NN_MISTD = [std_accuracy, std_precision, std_recall, std_f1, std_auc]
        return NN_MIScores, NN_MISTD
    else:
        # Define a new model
        input_dim = X_train.shape[1]
        model = Sequential([
            Input(shape=(input_dim,)),
            Dense(64, activation='relu'),
            Dense(32, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=Adam(learning_rate=0.001), loss=BinaryCrossentropy(), metrics=['accuracy'])

        # Train the model on the entire dataset
        model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

        # Evaluate on validation fold
        y_pred_prob = model.predict(holdout)
        y_pred = (y_pred_prob > 0.5).astype(int)

        # Calculate metrics
        accuracies.append(accuracy_score(y_test, y_pred))
        precisions.append(precision_score(y_test, y_pred))
        recalls.append(recall_score(y_test, y_pred))
        f1s.append(f1_score(y_test, y_pred))
        aucs.append(roc_auc_score(y_test, y_pred_prob))

        mean_accuracy = np.mean(accuracies)
        mean_precision = np.mean(precisions)
        mean_recall = np.mean(recalls)
        mean_f1 = np.mean(f1s)
        mean_auc = np.mean(aucs)
        std_accuracy = np.std(accuracies)
        std_precision = np.std(precisions)
        std_recall = np.std(recalls)
        std_f1 = np.std(f1s)
        std_auc = np.std(aucs)

            # Print metrics
        print(f"Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")
        print(f"Precision: {mean_precision:.4f} ± {std_precision:.4f}")
        print(f"Recall: {mean_recall:.4f} ± {std_recall:.4f}")
        print(f"F1 Score: {mean_f1:.4f} ± {std_f1:.4f}")
        print(f"AUC: {mean_auc:.4f} ± {std_auc:.4f}")
        


# Evaluate 

## Genetic Algorithm

### GA Mutual Information

In [76]:
# Naive Bayes 
ResultsNBGAMI, STDResultsNBGAMI= evaluate_naive_bayes_with_kfold(X_train_GaMiFt, y_train, k=10)

Accuracy: 0.9603 ± 0.0197
Precision: 0.7453 ± 0.1685
Recall: 0.8481 ± 0.1084
F1 Score: 0.7856 ± 0.1273
AUC: 0.9093 ± 0.0570


In [77]:
#Random Forest
ResultsRFGAMI, STDResultsRFGMI= evaluate_random_forest_with_kfold(X_train_GaMiFt, y_train, k=10)

Accuracy: 0.9650 ± 0.0181
Precision: 0.8266 ± 0.1479
Recall: 0.7362 ± 0.1615
F1 Score: 0.7740 ± 0.1407
AUC: 0.8610 ± 0.0826


In [78]:
#Neural Network
ResultsNNGAMI, STDResultsNNGAMI= evaluate_neural_network_with_kfold(X_train_GaMiFt, y_train, k=10)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Accuracy: 0.9662 ± 0.0177
Precision: 0.8682 ± 0.1404
Recall: 0.7489 ± 0.1449
F1 Score: 0.7922 ± 0.1138
AUC: 0.9818 ± 0.0183


### GA X2

In [79]:
# Naive Bayes
ResultsNBGAX2, STDResultsNBGAX2= evaluate_naive_bayes_with_kfold(X_train_GaX2Ft, y_train, k=10)


Accuracy: 0.9522 ± 0.0258
Precision: 0.6829 ± 0.1423
Recall: 0.9002 ± 0.1494
F1 Score: 0.7702 ± 0.1323
AUC: 0.9289 ± 0.0787


In [80]:
#Random Forest
ResultsRFGAX2, STDResultsRFGAX2= evaluate_random_forest_with_kfold(X_train_GaX2Ft, y_train, k=10)


Accuracy: 0.9638 ± 0.0191
Precision: 0.8309 ± 0.1243
Recall: 0.7275 ± 0.1509
F1 Score: 0.7719 ± 0.1324
AUC: 0.8567 ± 0.0774


In [81]:
#Neural Network
ResultsNNGAX2, STDResultsNNGAX2= evaluate_neural_network_with_kfold(X_train_GaX2Ft, y_train, k=10)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Accuracy: 0.9697 ± 0.0190
Precision: 0.8317 ± 0.1356
Recall: 0.8124 ± 0.1205
F1 Score: 0.8182 ± 0.1192
AUC: 0.9838 ± 0.0135


### GA ReliefF


In [82]:
# Naive Bayes
ResultsNBGAReliefF, STDResultsNBGAReliefF= evaluate_naive_bayes_with_kfold(X_train_GaReliefFFt, y_train, k=10)

Accuracy: 0.9428 ± 0.0279
Precision: 0.6436 ± 0.1580
Recall: 0.8577 ± 0.1474
F1 Score: 0.7277 ± 0.1405
AUC: 0.9044 ± 0.0780


In [83]:
#Random Forest
ResultsRFGAReliefF, STDResultsRFGAReliefF= evaluate_random_forest_with_kfold(X_train_GaReliefFFt, y_train, k=10)


Accuracy: 0.9615 ± 0.0165
Precision: 0.8175 ± 0.1531
Recall: 0.6996 ± 0.1626
F1 Score: 0.7487 ± 0.1496
AUC: 0.8427 ± 0.0830


In [84]:
#Neural Network
ResultsNNGAReliefF, STDResultsNNGAReliefF= evaluate_neural_network_with_kfold(X_train_GaReliefFFt, y_train, k=10)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Accuracy: 0.9627 ± 0.0047
Precision: 0.8127 ± 0.1249
Recall: 0.7532 ± 0.1649
F1 Score: 0.7700 ± 0.1065
AUC: 0.9780 ± 0.0249


## CS

### CS Mutual Information

In [85]:
# Naive Bayes
ResultsNBCSMI, STDResultsNBCSMI= evaluate_naive_bayes_with_kfold(X_train_CsMiFt, y_train, k=10)


Accuracy: 0.9592 ± 0.0234
Precision: 0.7281 ± 0.1583
Recall: 0.8913 ± 0.1352
F1 Score: 0.7930 ± 0.1300
AUC: 0.9283 ± 0.0704


In [86]:
#Random Forest
ResultsRFCsMI, STDResultsRFCsMI= evaluate_random_forest_with_kfold(X_train_CsMiFt, y_train, k=10)


Accuracy: 0.9673 ± 0.0146
Precision: 0.8474 ± 0.1225
Recall: 0.7287 ± 0.1666
F1 Score: 0.7791 ± 0.1432
AUC: 0.8585 ± 0.0844


In [87]:
#Neural Network
ResultsNNCsMI, STDResultsNNCsMI= evaluate_neural_network_with_kfold(X_train_CsMiFt, y_train, k=10)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Accuracy: 0.9697 ± 0.0190
Precision: 0.8526 ± 0.1346
Recall: 0.8104 ± 0.1371
F1 Score: 0.8203 ± 0.1101
AUC: 0.9841 ± 0.0171


### CS X2

In [88]:
# Naive Bayes
ResultsNBCSX2, STDResultsNBCSX2= evaluate_naive_bayes_with_kfold(X_train_CsX2Ft, y_train, k=10)

Accuracy: 0.9545 ± 0.0211
Precision: 0.6925 ± 0.1180
Recall: 0.8784 ± 0.1489
F1 Score: 0.7699 ± 0.1194
AUC: 0.9199 ± 0.0759


In [89]:
#Random Forest
ResultsRFCsX2, STDResultsRFCsX2= evaluate_random_forest_with_kfold(X_train_CsX2Ft, y_train, k=10)


Accuracy: 0.9638 ± 0.0248
Precision: 0.8036 ± 0.1633
Recall: 0.7658 ± 0.1851
F1 Score: 0.7787 ± 0.1633
AUC: 0.8739 ± 0.0949


In [90]:
#Neural Network
ResultsNNCsX2, STDResultsNNCsX2= evaluate_neural_network_with_kfold(X_train_CsX2Ft, y_train, k=10)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
Accuracy: 0.9650 ± 0.0240
Precision: 0.8282 ± 0.1616
Recall: 0.7734 ± 0.1507
F1 Score: 0.7919 ± 0.1354
AUC: 0.9815 ± 0.0157


### CS ReliefF


In [91]:
# Naive Bayes
ResultsNBCsReliefF, STDResultsNBCsReliefF= evaluate_naive_bayes_with_kfold(X_train_CsReliefFFt, y_train, k=10)


Accuracy: 0.9475 ± 0.0277
Precision: 0.6622 ± 0.1544
Recall: 0.8566 ± 0.1471
F1 Score: 0.7421 ± 0.1401
AUC: 0.9064 ± 0.0796


In [92]:
#Random Forest
ResultsRFCsReliefF, STDResultsRFCsReliefF= evaluate_random_forest_with_kfold(X_train_CsReliefFFt, y_train, k=10)


Accuracy: 0.9615 ± 0.0149
Precision: 0.8193 ± 0.1579
Recall: 0.7191 ± 0.1221
F1 Score: 0.7619 ± 0.1247
AUC: 0.8518 ± 0.0630


In [93]:
#Neural Network
ResultsNNCsReliefF, STDResultsNNCsReliefF= evaluate_neural_network_with_kfold(X_train_CsReliefFFt, y_train, k=10)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Accuracy: 0.9673 ± 0.0146
Precision: 0.8214 ± 0.1279
Recall: 0.7754 ± 0.1803
F1 Score: 0.7901 ± 0.1453
AUC: 0.9833 ± 0.0175


# ALL FEATURES

In [94]:
# Naive Bayes
ResultsNBAll, STDResultsNBAll= evaluate_naive_bayes_with_kfold(X_train, y_train, k=10)

Accuracy: 0.9394 ± 0.0385
Precision: 0.6300 ± 0.1821
Recall: 0.9002 ± 0.1494
F1 Score: 0.7337 ± 0.1610
AUC: 0.9218 ± 0.0863


In [95]:
#Random Forest
ResultsRFAll, STDResultsRFAll= evaluate_random_forest_with_kfold(X_train, y_train, k=10)


Accuracy: 0.9673 ± 0.0187
Precision: 0.8509 ± 0.1043
Recall: 0.7648 ± 0.1249
F1 Score: 0.8026 ± 0.1062
AUC: 0.8759 ± 0.0648


In [96]:
#Neural Network
ResultsNNAll, STDResultsNNAll= evaluate_neural_network_with_kfold(X_train, y_train, k=10)   


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Accuracy: 0.9708 ± 0.0167
Precision: 0.8587 ± 0.1263
Recall: 0.8188 ± 0.1548
F1 Score: 0.8254 ± 0.1042
AUC: 0.9887 ± 0.0099


### Save Results



In [98]:
import pickle
import os

# Ensure the output directory exists
os.makedirs('Results', exist_ok=True)

# Group results logically
results = [
    # GAMI
    ResultsNBGAMI, ResultsRFGAMI, ResultsNNGAMI,
    # GAX2
    ResultsNBGAX2, ResultsRFGAX2, ResultsNNGAX2,
    # GAReliefF
    ResultsNBGAReliefF, ResultsRFGAReliefF, ResultsNNGAReliefF,
    # CsMI
    ResultsNBCSMI, ResultsRFCsMI, ResultsNNCsMI,
    # CsX2
    ResultsNBCSX2, ResultsRFCsX2, ResultsNNCsX2,
    # CsReliefF
    ResultsNBCsReliefF, ResultsRFCsReliefF, ResultsNNCsReliefF,
    # All features
    ResultsNBAll, ResultsRFAll, ResultsNNAll
]

# Save to file
with open('Results/Results.pkl', 'wb') as f:
    pickle.dump(results, f)


# Validation


run with holdout

In [132]:
X_test_GaMiFt = seleccionar_caracteristicas(X_test,GaMiFtIndices)
X_test_GaX2Ft = seleccionar_caracteristicas(X_test, GaX2FtIndices)
X_test_GaReliefFFt = seleccionar_caracteristicas(X_test, GaReliefFFtIndices)
X_test_CsMiFt = seleccionar_caracteristicas(X_test,CsMiFtIndices)
X_test_CsX2Ft = seleccionar_caracteristicas(X_test, CsX2FtIndices)
X_test_CsReliefFFt = seleccionar_caracteristicas(X_test, CsReliefFFtIndices)



## GA Mutal Information

In [133]:
evaluate_naive_bayes_with_kfold(X_train_GaMiFt, y_train, k=1, holdout = X_test_GaMiFt, y_test = y_test)


Accuracy: 0.9582 ± 0.0000
Precision: 0.6667 ± 0.0000
Recall: 0.7368 ± 0.0000
F1 Score: 0.7000 ± 0.0000
AUC: 0.8554 ± 0.0000


In [134]:
evaluate_random_forest_with_kfold(X_train_GaMiFt, y_train, k=1, holdout = X_test_GaMiFt, y_test = y_test)

Accuracy: 0.9652 ± 0.0000
Precision: 0.7647 ± 0.0000
Recall: 0.6842 ± 0.0000
F1 Score: 0.7222 ± 0.0000
AUC: 0.8346 ± 0.0000


In [135]:
evaluate_neural_network_with_kfold(X_train_GaMiFt, y_train, k=1, holdout = X_test_GaMiFt, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Accuracy: 0.9547 ± 0.0000
Precision: 0.6500 ± 0.0000
Recall: 0.6842 ± 0.0000
F1 Score: 0.6667 ± 0.0000
AUC: 0.9711 ± 0.0000


## GA X2

In [136]:
evaluate_naive_bayes_with_kfold(X_train_GaX2Ft, y_train, k=1, holdout = X_test_GaX2Ft, y_test = y_test)

Accuracy: 0.9512 ± 0.0000
Precision: 0.6000 ± 0.0000
Recall: 0.7895 ± 0.0000
F1 Score: 0.6818 ± 0.0000
AUC: 0.8761 ± 0.0000


In [137]:
evaluate_random_forest_with_kfold(X_train_GaX2Ft, y_train, k=1, holdout = X_test_GaX2Ft, y_test = y_test)

Accuracy: 0.9686 ± 0.0000
Precision: 0.8125 ± 0.0000
Recall: 0.6842 ± 0.0000
F1 Score: 0.7429 ± 0.0000
AUC: 0.8365 ± 0.0000


In [138]:
evaluate_neural_network_with_kfold(X_train_GaX2Ft, y_train, k=1, holdout = X_test_GaX2Ft, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Accuracy: 0.9721 ± 0.0000
Precision: 0.7895 ± 0.0000
Recall: 0.7895 ± 0.0000
F1 Score: 0.7895 ± 0.0000
AUC: 0.9847 ± 0.0000


## GA ReliefF


In [139]:
evaluate_naive_bayes_with_kfold(X_train_GaReliefFFt, y_train, k=1, holdout = X_test_GaReliefFFt, y_test = y_test)

Accuracy: 0.9512 ± 0.0000
Precision: 0.6190 ± 0.0000
Recall: 0.6842 ± 0.0000
F1 Score: 0.6500 ± 0.0000
AUC: 0.8272 ± 0.0000


In [140]:
evaluate_random_forest_with_kfold(X_train_GaReliefFFt, y_train, k=1, holdout = X_test_GaReliefFFt, y_test = y_test)

Accuracy: 0.9582 ± 0.0000
Precision: 0.7059 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6667 ± 0.0000
AUC: 0.8065 ± 0.0000


In [141]:
evaluate_neural_network_with_kfold(X_train_GaReliefFFt, y_train, k=1, holdout = X_test_GaReliefFFt, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Accuracy: 0.9512 ± 0.0000
Precision: 0.6667 ± 0.0000
Recall: 0.5263 ± 0.0000
F1 Score: 0.5882 ± 0.0000
AUC: 0.9703 ± 0.0000


## CS Mutual Information

In [142]:
evaluate_naive_bayes_with_kfold(X_train_CsMiFt, y_train, k=1, holdout = X_test_CsMiFt, y_test = y_test)

Accuracy: 0.9652 ± 0.0000
Precision: 0.7143 ± 0.0000
Recall: 0.7895 ± 0.0000
F1 Score: 0.7500 ± 0.0000
AUC: 0.8835 ± 0.0000


In [143]:
evaluate_random_forest_with_kfold(X_train_CsMiFt, y_train, k=1, holdout = X_test_CsMiFt, y_test = y_test)

Accuracy: 0.9686 ± 0.0000
Precision: 0.8125 ± 0.0000
Recall: 0.6842 ± 0.0000
F1 Score: 0.7429 ± 0.0000
AUC: 0.8365 ± 0.0000


In [144]:
evaluate_neural_network_with_kfold(X_train_CsMiFt, y_train, k=1, holdout = X_test_CsMiFt, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Accuracy: 0.9617 ± 0.0000
Precision: 0.7500 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6857 ± 0.0000
AUC: 0.9725 ± 0.0000


## CS X2

In [145]:
evaluate_naive_bayes_with_kfold(X_train_CsX2Ft, y_train, k=1, holdout = X_test_CsX2Ft, y_test = y_test)

Accuracy: 0.9582 ± 0.0000
Precision: 0.6667 ± 0.0000
Recall: 0.7368 ± 0.0000
F1 Score: 0.7000 ± 0.0000
AUC: 0.8554 ± 0.0000


In [146]:
evaluate_random_forest_with_kfold(X_train_CsX2Ft, y_train, k=1, holdout = X_test_CsX2Ft, y_test = y_test)

Accuracy: 0.9617 ± 0.0000
Precision: 0.7500 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6857 ± 0.0000
AUC: 0.8083 ± 0.0000


In [147]:
evaluate_neural_network_with_kfold(X_train_CsX2Ft, y_train, k=1, holdout = X_test_CsX2Ft, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.9582 ± 0.0000
Precision: 0.7059 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6667 ± 0.0000
AUC: 0.9654 ± 0.0000


## CS ReliefF

In [148]:
evaluate_naive_bayes_with_kfold(X_train_CsReliefFFt, y_train, k=1, holdout = X_test_CsReliefFFt, y_test = y_test)

Accuracy: 0.9582 ± 0.0000
Precision: 0.6667 ± 0.0000
Recall: 0.7368 ± 0.0000
F1 Score: 0.7000 ± 0.0000
AUC: 0.8554 ± 0.0000


In [149]:
evaluate_random_forest_with_kfold(X_train_CsReliefFFt, y_train, k=1, holdout = X_test_CsReliefFFt, y_test = y_test)

Accuracy: 0.9617 ± 0.0000
Precision: 0.7500 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6857 ± 0.0000
AUC: 0.8083 ± 0.0000


In [150]:
evaluate_neural_network_with_kfold(X_train_CsReliefFFt, y_train, k=1, holdout = X_test_CsReliefFFt, y_test = y_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.9582 ± 0.0000
Precision: 0.7059 ± 0.0000
Recall: 0.6316 ± 0.0000
F1 Score: 0.6667 ± 0.0000
AUC: 0.9747 ± 0.0000
