In [None]:
import json

import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif, chi2
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Necessary functions

### Fitness Functions

In [None]:
%pip install skrebate
from skrebate import ReliefF


def mutual_information_eval(solution, data, labels):
    # Convert NumPy array back to DataFrame
    data_df = pd.DataFrame(data)
    selected_data = data_df.iloc[:, solution == 1]
    if selected_data.shape[1] == 0:
        return -np.inf
    mi_scores = mutual_info_classif(selected_data, labels)
    return np.sum(mi_scores)


def chi2_eval(solution, data, labels):
    # Convert NumPy array back to DataFrame
    data_df = pd.DataFrame(data)
    selected_data = data_df.iloc[:, solution == 1]
    if selected_data.shape[1] == 0:
        return -np.inf
    chi2_scores, _ = chi2(selected_data, labels)
    return np.mean(chi2_scores)





def relieff_eval(solution, data, labels, n_neighbors=10):

    if not isinstance(data, pd.DataFrame):
        data = pd.DataFrame(data)

    selected_features = data.iloc[:, solution.astype(bool)]

    if selected_features.shape[1] == 0:
        return -np.inf

    labels = np.array(labels)

    relief = ReliefF(n_neighbors=n_neighbors)
    relief.fit(selected_features.values, labels)

    relieff_score = relief.feature_importances_.mean()

    return relieff_score

def load_and_preprocess_data(filename='/home/SeisBenchV1_v1_1.json'):
    """
    Load and preprocess data from a JSON file.

    Parameters:
    - filename: Path to the JSON file.

    Returns:
    - X_scaled: Scaled feature matrix (DataFrame).
    - y: Target variable.
    """
    with open(filename) as file:
        data = json.load(file)
        data = pd.DataFrame(data)
        data.dropna(inplace=True)
        data.drop(data[data['Type'] == 'REGIONAL'].index, inplace=True)
        data.drop(data[data['Type'] == 'HB'].index, inplace=True)
        data.drop(data[data['Type'] == 'ICEQUAKE'].index, inplace=True)
        data.drop(data[data['Type'] == ''].index, inplace=True)

    label_encoder = LabelEncoder()
    data['Type'] = label_encoder.fit_transform(data['Type'])

    X = data.iloc[:, 1:]
    y = data['Type']

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    return pd.DataFrame(X_scaled, columns=X.columns), y









In [None]:
def add_result(classifier, fitness_function, accuracy, precision, recall, f1_score, auc):
    new_data = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1_score,
        "AUC": auc
    }

    if classifier == "Naive Bayes":
        global naive_bayes_df
        naive_bayes_df.loc[fitness_function] = new_data
    elif classifier == "Random Forest":
        global random_forest_df
        random_forest_df.loc[fitness_function] = new_data
    elif classifier == "Neural Network":
        global neural_network_df
        neural_network_df.loc[fitness_function] = new_data

### Save Results

In [None]:
from IPython.display import display, clear_output

# Step 1: Initialize empty DataFrames for each classifier with metrics as columns
metrics = ["Accuracy", "Precision", "Recall", "F1 Score", "AUC"]

naive_bayes_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])
random_forest_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])
neural_network_df = pd.DataFrame(columns=metrics, index=["Mutual Information", "X2", "Relief"])

# Display all tables function
def display_tables():
    clear_output(wait=True)
    print("Naive Bayes Results")
    display(naive_bayes_df)
    print("Random Forest Results")
    display(random_forest_df)
    print("Neural Network Results")
    display(neural_network_df)

# Genetic algorithm implementation

In [None]:
import numpy as np
from sklearn.feature_selection import mutual_info_classif
import random



# Function to calculate mutual information for a subset of features


def genetic_algorithm(X, y, population_size=42, num_parents=28, generations=100, mutation_rate=0.1, crossover_rate=0.8, fitness_function=mutual_information_eval):
    n_features = X.shape[1]

    # Initialize a random population of individuals (feature subsets)
    population = [np.random.choice([0, 1], size=n_features) for _ in range(population_size)]
    # population =population_size
    best_solution = None
    best_fitness = -float('inf')

    for generation in range(generations):
        print(f"Generation {generation + 1}/{generations}")
        # Evaluate the fitness of each individual in the population
        fitness_scores = []
        for individual in population:
            fitness = fitness_function(individual, X, y)
            fitness_scores.append(fitness)

            # Update the best solution found
            if fitness > best_fitness:
                best_fitness = fitness
                best_solution = individual.copy()

        # Selection: Select individuals based on their fitness (roulette wheel selection)
        fitness_sum = sum(fitness_scores)
        if fitness_sum == 0:
            probabilities = [1 / len(fitness_scores)] * len(fitness_scores)
        else:
            probabilities = [fitness / fitness_sum for fitness in fitness_scores]

        selected_population = random.choices(population, weights=probabilities, k=num_parents)

        # Crossover: Create new population using crossover
        new_population = []
        for i in range(0, population_size, 2):
            parent1 = selected_population[i % num_parents]
            parent2 = selected_population[(i + 1) % num_parents]

            if random.random() < crossover_rate:
                # Perform crossover (single-point crossover) while maintaining feature vector length
                crossover_point = random.randint(1, n_features - 1)
                child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
                child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
            else:
                child1, child2 = parent1.copy(), parent2.copy()

            new_population.append(child1)
            new_population.append(child2)

        # Mutation: Mutate the new population without modifying the length
        for individual in new_population:
            for feature in range(n_features):
                if random.random() < mutation_rate:
                    # Flip the bit for feature selection (1 becomes 0, 0 becomes 1)
                    individual[feature] = 1 - individual[feature]  # Flip the feature bit

        # Replace the old population with the new population
        population = new_population

    return best_solution, best_fitness








In [None]:
X, y = load_and_preprocess_data(filename='/home/SeisBenchV1_v1_1.json')


# Optimization

In [None]:
#Save the results from the optimizations
Mutual_Information = []
Chi2 = []
ReliefFList = []

In [None]:

# generations = 500
mutation=0.1
crossover=0.9

In [None]:
for n_generations in range(500, 1000, 50):
    best_solution, best_Gfitness = genetic_algorithm(X, y,mutation_rate=mutation,crossover_rate=crossover,fitness_function=mutual_information_eval,generations = n_generations )
    selected_features = X.columns[best_solution.astype(bool)].tolist()
    Mutual_Information.append([selected_features, best_Gfitness, n_generations])



Generation 1/500
Generation 2/500
Generation 3/500
Generation 4/500
Generation 5/500
Generation 6/500
Generation 7/500
Generation 8/500
Generation 9/500
Generation 10/500
Generation 11/500
Generation 12/500
Generation 13/500
Generation 14/500
Generation 15/500
Generation 16/500
Generation 17/500
Generation 18/500
Generation 19/500
Generation 20/500
Generation 21/500
Generation 22/500
Generation 23/500
Generation 24/500
Generation 25/500
Generation 26/500
Generation 27/500
Generation 28/500
Generation 29/500
Generation 30/500
Generation 31/500
Generation 32/500
Generation 33/500
Generation 34/500
Generation 35/500
Generation 36/500
Generation 37/500
Generation 38/500
Generation 39/500
Generation 40/500
Generation 41/500
Generation 42/500
Generation 43/500
Generation 44/500
Generation 45/500
Generation 46/500
Generation 47/500
Generation 48/500
Generation 49/500
Generation 50/500
Generation 51/500
Generation 52/500
Generation 53/500
Generation 54/500
Generation 55/500
Generation 56/500
G

In [None]:
for n_generations in range(500, 1000, 50):
    best_solution, best_Gfitness = genetic_algorithm(X, y,mutation_rate=mutation,crossover_rate=crossover,fitness_function=chi2_eval,generations = n_generations )
    selected_features = X.columns[best_solution.astype(bool)].tolist()
    Chi2.append([selected_features, best_Gfitness, n_generations])

In [11]:
for n_generations in range(500, 1000, 50):
    best_solution, best_Gfitness = genetic_algorithm(X, y,mutation_rate=mutation,crossover_rate=crossover,fitness_function=relieff_eval,generations = n_generations )
    selected_features = X.columns[best_solution.astype(bool)].tolist()
    ReliefFList.append([selected_features, best_Gfitness, n_generations])

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
Generation 601/650
Generation 602/650
Generation 603/650
Generation 604/650
Generation 605/650
Generation 606/650
Generation 607/650
Generation 608/650
Generation 609/650
Generation 610/650
Generation 611/650
Generation 612/650
Generation 613/650
Generation 614/650
Generation 615/650
Generation 616/650
Generation 617/650
Generation 618/650
Generation 619/650
Generation 620/650
Generation 621/650
Generation 622/650
Generation 623/650
Generation 624/650
Generation 625/650
Generation 626/650
Generation 627/650
Generation 628/650
Generation 629/650
Generation 630/650
Generation 631/650
Generation 632/650
Generation 633/650
Generation 634/650
Generation 635/650
Generation 636/650
Generation 637/650
Generation 638/650
Generation 639/650
Generation 640/650
Generation 641/650
Generation 642/650
Generation 643/650
Generation 644/650
Generation 645/650
Generation 646/650
Generation 647/650
Generation 648/650
Generation 64

In [12]:
import pickle
# with open('Mutual_Information.pkl', 'wb') as f:
#     pickle.dump(Mutual_Information, f)
# with open('Chi2.pkl', 'wb') as f:
#     pickle.dump(Chi2, f)
# with open('ReliefF.pkl', 'wb') as f:
#     pickle.dump(ReliefF, f)

In [13]:
Mutual_Information.sort(key=lambda x: x[1], reverse=True)
Chi2.sort(key=lambda x: x[1], reverse=True)
ReliefFList.sort(key=lambda x: x[1], reverse=True)

In [14]:
for i in Mutual_Information:
    print("Mutual Information: ",i[1], i[2])

Mutual Information:  2.777543066715758 950
Mutual Information:  2.7548592442883657 900
Mutual Information:  2.7296726169645935 750
Mutual Information:  2.726804963059743 800
Mutual Information:  2.7198261159774253 550
Mutual Information:  2.709151624001392 650
Mutual Information:  2.694973897590586 500
Mutual Information:  2.69406940514585 700
Mutual Information:  2.68765358769617 850
Mutual Information:  2.6446391193542915 600


In [15]:
for i in Chi2:
    print("Chi2: ",i[1], i[2])

Chi2:  13.832118550454123 750
Chi2:  12.452964588827 650
Chi2:  12.382896166498632 850
Chi2:  12.311137200338292 950
Chi2:  11.93763651064964 900
Chi2:  11.934168837137447 550
Chi2:  11.929854647358617 600
Chi2:  11.918739691364488 500
Chi2:  11.89582265132195 700
Chi2:  11.885025943951984 800


In [16]:
for i in ReliefFList:
    print("ReliefF List: ",i[1], i[2])

ReliefF List:  0.30195163642390593 550
ReliefF List:  0.30143552482117103 600
ReliefF List:  0.2973001286507523 850
ReliefF List:  0.29581614092035174 750
ReliefF List:  0.2957203604008711 650
ReliefF List:  0.29254720378537646 800
ReliefF List:  0.292336895904233 900
ReliefF List:  0.29021919085767417 500
ReliefF List:  0.2886704749087467 950
ReliefF List:  0.28413407807710245 700


In [17]:
print("Mutual Information selected Features: \n",len(Mutual_Information[0][0]))


Mutual Information selected Features: 
 57


In [18]:
print("Chi2 selected features: \n",len(Chi2[0][0]))

Chi2 selected features: 
 25


In [19]:
print("ReliefF selected features: \n",len(ReliefFList[0][0]))


ReliefF selected features: 
 34


## Base Results

In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### Naive Bayes

In [22]:


#Naive Bayes classifier with all features

cmodel = GaussianNB()
cmodel.fit(X_train, y_train)
y_pred = cmodel.predict(X_test)
y_pred_proba = cmodel.predict_proba(X_test)[:, 1]  # Probability estimates for AUC

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)

In [23]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)


Accuracy:  0.9477351916376306
Precision:  0.5769230769230769
Recall:  0.7894736842105263
F1:  0.6666666666666666
AUC:  0.9470738413197173


### Random Forest

In [24]:
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)

random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
y_pred_proba = random_forest.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)


In [25]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)


Accuracy:  0.9651567944250871
Precision:  0.8
Recall:  0.631578947368421
F1:  0.7058823529411765
AUC:  0.9813432835820897


### Neural Network

In [26]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

In [27]:


# Crear y compilar el modelo
RN = Sequential()

# Definir la dimensión de entrada basada en las características seleccionadas
input_dim = X_train.shape[1]
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))
RN.add(Dense(units=32, activation='relu'))
RN.add(Dense(units=1, activation='sigmoid'))

# Compilar el modelo
RN.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),
              metrics=['accuracy'])

# Entrenar el modelo
history = RN.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluar el modelo en el conjunto de prueba
loss, accuracy = RN.evaluate(X_test, y_test)

print(f'Test Accuracy: {accuracy}')

# Realizar predicciones en el conjunto de prueba
y_pred_prob = RN.predict(X_test)  # Predicciones como probabilidades
y_pred = (y_pred_prob > 0.5).astype(int)  # Convertir probabilidades a etiquetas (0 o 1)




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [28]:
# Calcular y mostrar las métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))  # Usamos probabilidades para AUC
add_result("Neural Network", "Mutual Information", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred_prob))

Accuracy: 0.9651567944250871
Precision: 0.7368421052631579
Recall: 0.7368421052631579
F1: 0.7368421052631579
AUC: 0.9679890023566379


In [29]:
MI_Scores = []
Chi_Scores = []
ReliefF_Scores = []

# Genetic algorithm with optimized parameters (best fitness)

In [30]:
MIOpt= []
ChiOpt = []
ReliefOpt = []

In [31]:
best_solution, best_Gfitness = genetic_algorithm(X, y, mutation_rate=mutation, crossover_rate=crossover,fitness_function=mutual_information_eval, generations=Mutual_Information[0][2])


GAMIFeatures = X.columns[best_solution.astype(bool)].tolist()
MIOpt.append([GAMIFeatures, best_Gfitness])

Generation 1/950
Generation 2/950
Generation 3/950
Generation 4/950
Generation 5/950
Generation 6/950
Generation 7/950
Generation 8/950
Generation 9/950
Generation 10/950
Generation 11/950
Generation 12/950
Generation 13/950
Generation 14/950
Generation 15/950
Generation 16/950
Generation 17/950
Generation 18/950
Generation 19/950
Generation 20/950
Generation 21/950
Generation 22/950
Generation 23/950
Generation 24/950
Generation 25/950
Generation 26/950
Generation 27/950
Generation 28/950
Generation 29/950
Generation 30/950
Generation 31/950
Generation 32/950
Generation 33/950
Generation 34/950
Generation 35/950
Generation 36/950
Generation 37/950
Generation 38/950
Generation 39/950
Generation 40/950
Generation 41/950
Generation 42/950
Generation 43/950
Generation 44/950
Generation 45/950
Generation 46/950
Generation 47/950
Generation 48/950
Generation 49/950
Generation 50/950
Generation 51/950
Generation 52/950
Generation 53/950
Generation 54/950
Generation 55/950
Generation 56/950
G

In [32]:
print(MIOpt)

[[['f1_t_mean', 'f3_t_var', 'f4_t_entropy', 'f12_t_zcr', 'f13_t_PeaksAboveRMSDensity_fun', 'f14_f_peaks_pos_1', 'f15_f_90_percent_energy', 'f16_f_entropy', 'f20_f_energy', 'f21_f_kurtosis', 'f22_f_multiscaleEntropy', 'f23_f_peak_1020_value', 'f25_f_peak_2030_value', 'f26_f_peak_2030_pos', 'f27_f_rms', 'f28_f_peak2rms', 'f30_f_PeaksAboveRMSDensity_fun', 'f31_f_peaks_val_2', 'f34_f_peaks_pos_3', 'f35_w_f_maxval_A6', 'f37_w_f_maxval_D2', 'f38_w_f_maxval_D3', 'f39_w_f_maxval_D4', 'f40_w_f_maxval_D5', 'f41_w_f_maxval_D6', 'f42_w_f_maxpos_A6', 'f43_w_f_maxpos_D2', 'f44_w_f_maxpos_D3', 'f46_w_f_maxpos_D5', 'f47_w_f_maxpos_D6', 'f50_w_f_mean_D2', 'f52_w_f_mean_D4', 'f54_w_f_mean_D6', 'f55_w_t_meanEnergyAD', 'f57_w_t_PEC_D1', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f60_w_t_PEC_D4', 'f61_w_t_PEC_D5', 'f63_w_t_rms_A6', 'f64_w_t_rms_D1', 'f65_w_t_rms_D2', 'f66_w_t_rms_D3', 'f67_w_t_rms_D4', 'f70_w_t_peak2peak_A6', 'f71_w_t_peak2peak_D1', 'f72_w_t_peak2peak_D2', 'f73_w_t_peak2peak_D3', 'f74_w_t_peak2p

In [33]:
print("Features selected by MI: ", GAMIFeatures)


Features selected by MI:  ['f1_t_mean', 'f3_t_var', 'f4_t_entropy', 'f12_t_zcr', 'f13_t_PeaksAboveRMSDensity_fun', 'f14_f_peaks_pos_1', 'f15_f_90_percent_energy', 'f16_f_entropy', 'f20_f_energy', 'f21_f_kurtosis', 'f22_f_multiscaleEntropy', 'f23_f_peak_1020_value', 'f25_f_peak_2030_value', 'f26_f_peak_2030_pos', 'f27_f_rms', 'f28_f_peak2rms', 'f30_f_PeaksAboveRMSDensity_fun', 'f31_f_peaks_val_2', 'f34_f_peaks_pos_3', 'f35_w_f_maxval_A6', 'f37_w_f_maxval_D2', 'f38_w_f_maxval_D3', 'f39_w_f_maxval_D4', 'f40_w_f_maxval_D5', 'f41_w_f_maxval_D6', 'f42_w_f_maxpos_A6', 'f43_w_f_maxpos_D2', 'f44_w_f_maxpos_D3', 'f46_w_f_maxpos_D5', 'f47_w_f_maxpos_D6', 'f50_w_f_mean_D2', 'f52_w_f_mean_D4', 'f54_w_f_mean_D6', 'f55_w_t_meanEnergyAD', 'f57_w_t_PEC_D1', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f60_w_t_PEC_D4', 'f61_w_t_PEC_D5', 'f63_w_t_rms_A6', 'f64_w_t_rms_D1', 'f65_w_t_rms_D2', 'f66_w_t_rms_D3', 'f67_w_t_rms_D4', 'f70_w_t_peak2peak_A6', 'f71_w_t_peak2peak_D1', 'f72_w_t_peak2peak_D2', 'f73_w_t_peak2p

In [34]:
best_solution, best_Gfitness = genetic_algorithm(X, y, mutation_rate=mutation, crossover_rate=crossover,fitness_function=chi2_eval, generations=Chi2[0][2])

GAX2Features = X.columns[best_solution.astype(bool)].tolist()
ChiOpt.append([GAX2Features, best_Gfitness])

Generation 1/750
Generation 2/750
Generation 3/750
Generation 4/750
Generation 5/750
Generation 6/750
Generation 7/750
Generation 8/750
Generation 9/750
Generation 10/750
Generation 11/750
Generation 12/750
Generation 13/750
Generation 14/750
Generation 15/750
Generation 16/750
Generation 17/750
Generation 18/750
Generation 19/750
Generation 20/750
Generation 21/750
Generation 22/750
Generation 23/750
Generation 24/750
Generation 25/750
Generation 26/750
Generation 27/750
Generation 28/750
Generation 29/750
Generation 30/750
Generation 31/750
Generation 32/750
Generation 33/750
Generation 34/750
Generation 35/750
Generation 36/750
Generation 37/750
Generation 38/750
Generation 39/750
Generation 40/750
Generation 41/750
Generation 42/750
Generation 43/750
Generation 44/750
Generation 45/750
Generation 46/750
Generation 47/750
Generation 48/750
Generation 49/750
Generation 50/750
Generation 51/750
Generation 52/750
Generation 53/750
Generation 54/750
Generation 55/750
Generation 56/750
G

In [35]:
print(ChiOpt)

[[['f4_t_entropy', 'f7_t_time2peak', 'f9_t_peak2peak', 'f10_t_peak2rms', 'f14_f_peaks_pos_1', 'f16_f_entropy', 'f22_f_multiscaleEntropy', 'f28_f_peak2rms', 'f30_f_PeaksAboveRMSDensity_fun', 'f39_w_f_maxval_D4', 'f41_w_f_maxval_D6', 'f44_w_f_maxpos_D3', 'f46_w_f_maxpos_D5', 'f50_w_f_mean_D2', 'f55_w_t_meanEnergyAD', 'f57_w_t_PEC_D1', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f62_w_t_PEC_D6', 'f65_w_t_rms_D2', 'f66_w_t_rms_D3', 'f71_w_t_peak2peak_D1', 'f72_w_t_peak2peak_D2', 'f75_w_t_peak2peak_D5', 'f79_w_t_peak2rms_D2', 'f83_w_t_peak2rms_D6'], 12.930711873955378]]


In [36]:
print("Features selected by Chi2: ", GAX2Features)

Features selected by Chi2:  ['f4_t_entropy', 'f7_t_time2peak', 'f9_t_peak2peak', 'f10_t_peak2rms', 'f14_f_peaks_pos_1', 'f16_f_entropy', 'f22_f_multiscaleEntropy', 'f28_f_peak2rms', 'f30_f_PeaksAboveRMSDensity_fun', 'f39_w_f_maxval_D4', 'f41_w_f_maxval_D6', 'f44_w_f_maxpos_D3', 'f46_w_f_maxpos_D5', 'f50_w_f_mean_D2', 'f55_w_t_meanEnergyAD', 'f57_w_t_PEC_D1', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f62_w_t_PEC_D6', 'f65_w_t_rms_D2', 'f66_w_t_rms_D3', 'f71_w_t_peak2peak_D1', 'f72_w_t_peak2peak_D2', 'f75_w_t_peak2peak_D5', 'f79_w_t_peak2rms_D2', 'f83_w_t_peak2rms_D6']


In [37]:
best_solution, best_Gfitness = genetic_algorithm(X, y, mutation_rate=mutation, crossover_rate=crossover,fitness_function=chi2_eval, generations=ReliefFList[0][2])

GARFFeatures = X.columns[best_solution.astype(bool)].tolist()
ChiOpt.append([GARFFeatures, best_Gfitness])


Generation 1/550
Generation 2/550
Generation 3/550
Generation 4/550
Generation 5/550
Generation 6/550
Generation 7/550
Generation 8/550
Generation 9/550
Generation 10/550
Generation 11/550
Generation 12/550
Generation 13/550
Generation 14/550
Generation 15/550
Generation 16/550
Generation 17/550
Generation 18/550
Generation 19/550
Generation 20/550
Generation 21/550
Generation 22/550
Generation 23/550
Generation 24/550
Generation 25/550
Generation 26/550
Generation 27/550
Generation 28/550
Generation 29/550
Generation 30/550
Generation 31/550
Generation 32/550
Generation 33/550
Generation 34/550
Generation 35/550
Generation 36/550
Generation 37/550
Generation 38/550
Generation 39/550
Generation 40/550
Generation 41/550
Generation 42/550
Generation 43/550
Generation 44/550
Generation 45/550
Generation 46/550
Generation 47/550
Generation 48/550
Generation 49/550
Generation 50/550
Generation 51/550
Generation 52/550
Generation 53/550
Generation 54/550
Generation 55/550
Generation 56/550
G

In [38]:
print(ReliefOpt)

[]


In [39]:
print("Features selected by ReliefF: ", GARFFeatures)

Features selected by ReliefF:  ['f3_t_var', 'f13_t_PeaksAboveRMSDensity_fun', 'f14_f_peaks_pos_1', 'f20_f_energy', 'f22_f_multiscaleEntropy', 'f23_f_peak_1020_value', 'f27_f_rms', 'f28_f_peak2rms', 'f30_f_PeaksAboveRMSDensity_fun', 'f31_f_peaks_val_2', 'f35_w_f_maxval_A6', 'f37_w_f_maxval_D2', 'f41_w_f_maxval_D6', 'f43_w_f_maxpos_D2', 'f44_w_f_maxpos_D3', 'f47_w_f_maxpos_D6', 'f48_w_f_mean_A6', 'f51_w_f_mean_D3', 'f54_w_f_mean_D6', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f65_w_t_rms_D2', 'f67_w_t_rms_D4', 'f68_w_t_rms_D5', 'f72_w_t_peak2peak_D2', 'f73_w_t_peak2peak_D3', 'f74_w_t_peak2peak_D4', 'f80_w_t_peak2rms_D3', 'f81_w_t_peak2rms_D4', 'f82_w_t_peak2rms_D5']


### Mutual Information Classifiers

#### Naive Bayes with Mutual Information

In [40]:
NB = GaussianNB()

In [41]:
NB.fit(X_train[GAMIFeatures], y_train)

y_pred = NB.predict(X_test[GAMIFeatures])

In [42]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Naive Bayes", "Mutual Information", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9512195121951219
Precision: 0.6
Recall: 0.7894736842105263
F1: 0.6818181818181819
AUC: 0.8760801256873527


### Random Forest with Mutual Information

In [43]:
DT = RandomForestClassifier()


In [44]:
DT.fit(X_train[GAMIFeatures], y_train)
y_pred = DT.predict(X_test[GAMIFeatures])


In [45]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Random Forest", "Mutual Information", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9686411149825784
Precision: 0.8125
Recall: 0.6842105263157895
F1: 0.742857142857143
AUC: 0.8365082482325216


### Neural Network with Mutual Information

In [48]:
# Construir el modelo
RN = Sequential()

# Dimensión de entrada
input_dim = X_train[GAMIFeatures].shape[1]
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))
RN.add(Dense(units=32, activation='relu'))
RN.add(Dense(units=1, activation='sigmoid'))

# Compilar el modelo
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

# Entrenar el modelo
history = RN.fit(X_train[GAMIFeatures], y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluar el modelo en el conjunto de prueba
loss, accuracy = RN.evaluate(X_test[GAMIFeatures], y_test)

print(f'Test Accuracy: {accuracy}')

# Realizar predicciones en el conjunto de prueba
y_pred_prob = RN.predict(X_test[GAMIFeatures])  # Predicciones como probabilidades
y_pred = (y_pred_prob > 0.5).astype(int)  # Convertir probabilidades a etiquetas (0 o 1)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [49]:
# Calcular y mostrar las métricas
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))  # Usamos probabilidades para AUC
add_result("Neural Network", "Mutual Information", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred_prob))

Accuracy: 0.9651567944250871
Precision: 0.7368421052631579
Recall: 0.7368421052631579
F1: 0.7368421052631579
AUC: 0.9797721916732128


### Chi2 Classifiers

#### Naive Bayes with chi2

In [50]:
NB.fit(X_train[GAX2Features], y_train)
y_pred = NB.predict(X_test[GAX2Features])


In [51]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Naive Bayes", "X2", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9547038327526133
Precision: 0.6363636363636364
Recall: 0.7368421052631579
F1: 0.6829268292682926
AUC: 0.8534956794972507


#### Random Forest with chi2

In [52]:
DT.fit(X_train[GAX2Features], y_train)
y_pred = DT.predict(X_test[GAX2Features])

In [53]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Random Forest", "X2", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9651567944250871
Precision: 0.7647058823529411
Recall: 0.6842105263157895
F1: 0.7222222222222222
AUC: 0.8346425765907304


#### Neural Network with chi2

In [55]:
# Número de características seleccionadas por GAX2Features
input_dim = X_train[GAX2Features].shape[1]

RN = Sequential()
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))  # Capa de entrada
RN.add(Dense(units=32, activation='relu'))  # Capa oculta
RN.add(Dense(units=1, activation='sigmoid'))  # Capa de salida

# Compilar el modelo
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

# Entrenar el modelo
history = RN.fit(X_train[GAX2Features], y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluar el modelo en el conjunto de prueba
loss, accuracy = RN.evaluate(X_test[GAX2Features], y_test)
print(f'Test Accuracy: {accuracy}')

# Realizar predicciones en el conjunto de prueba
y_pred_prob = RN.predict(X_test[GAX2Features])  # Predicciones como probabilidades
y_pred = (y_pred_prob > 0.5).astype(int)  # Convertir probabilidades a etiquetas binarias (0 o 1)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [56]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))
add_result("Neural Network", "X2", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred_prob))

Accuracy: 0.9651567944250871
Precision: 0.8
Recall: 0.631578947368421
F1: 0.7058823529411765
AUC: 0.9852710133542812


### ReliefF Classifiers

#### Naive Bayes with ReliefF

In [57]:
NB.fit(X_train[GARFFeatures], y_train)
y_pred = NB.predict(X_test[GARFFeatures])


In [58]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Naive Bayes", "Relief", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9581881533101045
Precision: 0.6521739130434783
Recall: 0.7894736842105263
F1: 0.7142857142857143
AUC: 0.8798114689709349


#### Random Forest with ReliefF


In [59]:
DT.fit(X_train[GARFFeatures], y_train)
y_pred = DT.predict(X_test[GARFFeatures])


In [60]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))
add_result("Random Forest", "Relief", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred))

Accuracy: 0.9651567944250871
Precision: 0.7647058823529411
Recall: 0.6842105263157895
F1: 0.7222222222222222
AUC: 0.8346425765907304


#### Neural Network with ReliefF

In [61]:
# Número de características seleccionadas por ReliefF
input_dim = X_train[GARFFeatures].shape[1]

RN = Sequential()
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))  # Capa de entrada
RN.add(Dense(units=32, activation='relu'))  # Capa oculta
RN.add(Dense(units=1, activation='sigmoid'))  # Capa de salida

# Compilar el modelo
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

# Entrenar el modelo
history = RN.fit(X_train[GARFFeatures], y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluar el modelo en el conjunto de prueba
loss, accuracy = RN.evaluate(X_test[GARFFeatures], y_test)
print(f'Test Accuracy: {accuracy}')

# Realizar predicciones en el conjunto de prueba
y_pred_prob = RN.predict(X_test[GARFFeatures])  # Predicciones como probabilidades
y_pred = (y_pred_prob > 0.5).astype(int)  # Convertir probabilidades a etiquetas binarias (0 o 1)


print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))
add_result("Neural Network", "Relief", accuracy_score(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), roc_auc_score(y_test, y_pred_prob))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

# Display results

In [62]:
display_tables()

Naive Bayes Results


Unnamed: 0,Accuracy,Precision,Recall,F1 Score,AUC
Mutual Information,0.95122,0.6,0.789474,0.681818,0.87608
X2,0.954704,0.636364,0.736842,0.682927,0.853496
Relief,0.958188,0.652174,0.789474,0.714286,0.879811


Random Forest Results


Unnamed: 0,Accuracy,Precision,Recall,F1 Score,AUC
Mutual Information,0.968641,0.8125,0.684211,0.742857,0.836508
X2,0.965157,0.764706,0.684211,0.722222,0.834643
Relief,0.965157,0.764706,0.684211,0.722222,0.834643


Neural Network Results


Unnamed: 0,Accuracy,Precision,Recall,F1 Score,AUC
Mutual Information,0.965157,0.736842,0.736842,0.736842,0.979772
X2,0.965157,0.8,0.631579,0.705882,0.985271
Relief,0.968641,0.8125,0.684211,0.742857,0.982129
