In [11]:
import json

import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif, chi2
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, MinMaxScaler


def mutual_information_eval(solution, data, labels):
    # Convert NumPy array back to DataFrame
    data_df = pd.DataFrame(data)
    selected_data = data_df.iloc[:, solution == 1]
    if selected_data.shape[1] == 0:
        return -np.inf
    mi_scores = mutual_info_classif(selected_data, labels)
    return np.sum(mi_scores)


def chi2_eval(solution, data, labels):
    # Convert NumPy array back to DataFrame
    data_df = pd.DataFrame(data)
    selected_data = data_df.iloc[:, solution == 1]
    if selected_data.shape[1] == 0:
        return -np.inf
    chi2_scores, _ = chi2(selected_data, labels)
    return np.mean(chi2_scores)


import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors


def relieff_eval(solution, data, labels, n_neighbors=10):
    """
    Evaluate the quality of a feature subset using the ReliefF algorithm.

    Parameters:
    - solution: Binary array indicating the selected features.
    - data: Feature matrix.
    - labels: Target variable.
    - n_neighbors: Number of neighbors to consider.

    Returns:
    - relieff_score: The average ReliefF score for the selected features.
    """
    # Ensure data is a DataFrame
    if not isinstance(data, pd.DataFrame):
        data = pd.DataFrame(data)

    # Select features based on the solution
    selected_features = data.iloc[:, solution.astype(bool)].to_numpy()

    # Check if any features are selected
    if selected_features.shape[1] == 0:
        return -np.inf

    n_samples, n_features = selected_features.shape

    # Convert labels to NumPy array
    labels = np.array(labels)

    # Fit the nearest neighbors model
    nn = NearestNeighbors(n_neighbors=n_neighbors + 1).fit(selected_features)

    # Find nearest neighbors for all samples at once
    distances, indices = nn.kneighbors(selected_features)

    # Initialize the score array
    scores = np.zeros(n_features)

    # Efficiently compute differences
    for i in range(n_samples):
        # Neighbors for the current sample (excluding the sample itself)
        neighbors = indices[i, 1:]

        # Boolean mask for same-class and different-class neighbors
        same_class_mask = labels[i] == labels[neighbors]
        diff_class_mask = ~same_class_mask

        # Get the neighbors' features
        current_sample = selected_features[i, :]
        same_class_neighbors = selected_features[neighbors[same_class_mask], :]
        diff_class_neighbors = selected_features[neighbors[diff_class_mask], :]

        # Compute feature differences for both classes
        if same_class_neighbors.shape[0] > 0:
            diff_same_class = np.abs(current_sample - same_class_neighbors).sum(axis=0)
        else:
            diff_same_class = np.zeros(n_features)

        if diff_class_neighbors.shape[0] > 0:
            diff_diff_class = np.abs(current_sample - diff_class_neighbors).sum(axis=0)
        else:
            diff_diff_class = np.zeros(n_features)

        # Update scores
        scores += (diff_diff_class - diff_same_class)

    # Average the scores over the number of samples
    relieff_score = np.mean(scores / n_samples)
    return relieff_score
def load_and_preprocess_data(filename ='/home/testp/SeisBenchV1_v1_1.json'):

    with open(filename) as file:
        data = json.load(file)
        data = pd.DataFrame(data)
        data.dropna(inplace=True)
        data.drop(data[data['Type'] == 'REGIONAL'].index, inplace=True)
        data.drop(data[data['Type'] == 'HB'].index, inplace=True)
        data.drop(data[data['Type'] == 'ICEQUAKE'].index, inplace=True)
        data.drop(data[data['Type'] == ''].index, inplace=True)

    label_encoder = LabelEncoder()
    data['Type'] = label_encoder.fit_transform(data['Type'])

    X = data.iloc[:, 1:]
    y = data['Type']

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    return pd.DataFrame(X_scaled, columns=X.columns), y


In [12]:
import numpy as np


In [13]:
X, y = load_and_preprocess_data()



In [14]:
Mutual_Information = []
Chi2 = []
ReliefF = []

### Cuckoo Run


In [19]:
import json

from sklearn.feature_selection import mutual_info_classif
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from scipy.special import gamma
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier



# Función para realizar vuelos de Levy
def levy_flight(Lambda=1.5, size=1):
    sigma = (gamma(1 + Lambda) * np.sin(np.pi * Lambda / 2) /
             (gamma((1 + Lambda) / 2) * Lambda * 2 ** ((Lambda - 1) / 2))) ** (1 / Lambda)
    u = np.random.normal(0, sigma, size)
    v = np.random.normal(0, 1, size)
    step = u / abs(v) ** (1 / Lambda)
    return step


# Búsqueda de cucos con impresiones de seguimiento
def cuckoo_search(n, dim, iter_max, data, labels, pa=0.25, fitness_function=mutual_information_eval):
    # Initialize nests as random binary matrices (n nests, each with 'dim' features)
    nests = np.random.rand(n, dim) > 0.5

    # Calculate the initial fitness of all nests using mutual information
    fitness = np.array([fitness_function(nest.astype(int), data, labels) for nest in nests])

    # Main loop: iterate for 'iter_max' iterations
    for t in range(iter_max):
        # print(f"Iteración {t + 1}/{iter_max}")  # Display current iteration number

        # For each nest, perform the Cuckoo Search process
        for i in range(n):
            # Generate a new solution via Levy flight (binary mutation)
            step_size = levy_flight(size=dim) > 0.5  # Create a random step using Levy flight
            new_nest = np.logical_xor(nests[i], step_size).astype(int)  # Mutate the current nest

            # Evaluate the fitness of the newly generated nest
            new_fitness = fitness_function(new_nest, data, labels)
            # print(f"  Nido {i + 1} fitness: {fitness[i]:.4f} -> {new_fitness:.4f}")
            # Select a random different nest to compare the new solution against
            random_nest_index = np.random.choice([j for j in range(n) if j != i])

            # If the new nest has better fitness than the random nest, replace it
            if new_fitness > fitness[random_nest_index]:
                nests[random_nest_index] = new_nest  # Replace the random nest with the new one
                fitness[random_nest_index] = new_fitness  # Update fitness of the replaced nest
                # print(f"  Nido {random_nest_index + 1} mejorado a fitness {new_fitness:.4f}")

        # Abandon a fraction 'pa' of the worst nests and replace them with new random nests
        n_abandon = int(n * pa)  # Calculate the number of nests to abandon
        worst_nests_indices = np.argsort(fitness)[:n_abandon]  # Indices of the worst nests

        # Replace the worst nests with new random ones and recalculate their fitness
        nests[worst_nests_indices] = np.random.rand(n_abandon, dim) > 0.5
        fitness[worst_nests_indices] = np.array(
            [fitness_function(nest.astype(int), data, labels) for nest in nests[worst_nests_indices]]
        )
        # print(f"  {n_abandon} peores nidos abandonados y reemplazados por nuevos nidos.")

        # Display the best fitness found at the end of this iteration
        best_fitness = np.max(fitness)
        print(f"Mejor fitness al final de la iteración {t + 1}: {best_fitness:.4f}\n")

    # After all iterations, find the best solution (nest) with the highest fitness
    best_idx = np.argmax(fitness)  # Index of the best nest
    best_nest = nests[best_idx]  # The best nest (binary feature selection vector)
    best_fitness = fitness[best_idx]  # The best fitness value
    # print(f"Mejor nido encontrado:\n{best_nest}\nCon un fitness de: {best_fitness:.4f}")

    # Return the final set of nests, their fitness values, and the best solution
    return nests, fitness, best_nest, best_fitness

In [20]:
#https://sci-hub.se/10.1007/s11053-021-09823-7
n = 50
dim = 84

for i in range(100,550,50):
    nests, fitness_scores, best_nest, best_CKMIfitness = cuckoo_search(n, dim, i, X, y, fitness_function=mutual_information_eval)
    CKMIFeatures = X.columns[best_nest.astype(bool)].tolist()

    Mutual_Information.append([CKMIFeatures, best_CKMIfitness, i])



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Mejor fitness al final de la iteración 101: 2.7127

Mejor fitness al final de la iteración 102: 2.7127

Mejor fitness al final de la iteración 103: 2.7127

Mejor fitness al final de la iteración 104: 2.7127

Mejor fitness al final de la iteración 105: 2.7127

Mejor fitness al final de la iteración 106: 2.7127

Mejor fitness al final de la iteración 107: 2.7127

Mejor fitness al final de la iteración 108: 2.7127

Mejor fitness al final de la iteración 109: 2.7127

Mejor fitness al final de la iteración 110: 2.7127

Mejor fitness al final de la iteración 111: 2.7127

Mejor fitness al final de la iteración 112: 2.7127

Mejor fitness al final de la iteración 113: 2.7127

Mejor fitness al final de la iteración 114: 2.7127

Mejor fitness al final de la iteración 115: 2.7127

Mejor fitness al final de la iteración 116: 2.7127

Mejor fitness al final de la iteración 117: 2.7127

Mejor fitness al final de la iteración 118: 2.7127


In [21]:
for i in range(100,550,50):
    nests, fitness_scores, best_nest, best_CX2fitness = cuckoo_search(n, dim, i, X, y, fitness_function=chi2_eval)
    CKX2Features = X.columns[best_nest.astype(bool)].tolist()

    Chi2.append([CKX2Features, best_CX2fitness, i])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Mejor fitness al final de la iteración 101: 12.6450

Mejor fitness al final de la iteración 102: 12.6450

Mejor fitness al final de la iteración 103: 12.6450

Mejor fitness al final de la iteración 104: 12.6450

Mejor fitness al final de la iteración 105: 12.6450

Mejor fitness al final de la iteración 106: 12.6450

Mejor fitness al final de la iteración 107: 12.6450

Mejor fitness al final de la iteración 108: 12.6450

Mejor fitness al final de la iteración 109: 12.6450

Mejor fitness al final de la iteración 110: 12.6450

Mejor fitness al final de la iteración 111: 12.6450

Mejor fitness al final de la iteración 112: 12.6450

Mejor fitness al final de la iteración 113: 12.6450

Mejor fitness al final de la iteración 114: 12.6450

Mejor fitness al final de la iteración 115: 12.6450

Mejor fitness al final de la iteración 116: 12.6450

Mejor fitness al final de la iteración 117: 12.6450

Mejor fitness al final de la itera

In [22]:
for i in range(100,550,50):
    nests, fitness_scores, best_nest, best_CKRFfitness = cuckoo_search(n, dim, i, X, y, fitness_function=relieff_eval)
    CKRFFeatures = X.columns[best_nest.astype(bool)].tolist()

    ReliefF.append([CKRFFeatures, best_CKRFfitness, i])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Mejor fitness al final de la iteración 101: -0.4224

Mejor fitness al final de la iteración 102: -0.4224

Mejor fitness al final de la iteración 103: -0.4224

Mejor fitness al final de la iteración 104: -0.4224

Mejor fitness al final de la iteración 105: -0.4224

Mejor fitness al final de la iteración 106: -0.4224

Mejor fitness al final de la iteración 107: -0.4224

Mejor fitness al final de la iteración 108: -0.4224

Mejor fitness al final de la iteración 109: -0.4224

Mejor fitness al final de la iteración 110: -0.4224

Mejor fitness al final de la iteración 111: -0.4224

Mejor fitness al final de la iteración 112: -0.4224

Mejor fitness al final de la iteración 113: -0.4224

Mejor fitness al final de la iteración 114: -0.4224

Mejor fitness al final de la iteración 115: -0.4224

Mejor fitness al final de la iteración 116: -0.4224

Mejor fitness al final de la iteración 117: -0.4224

Mejor fitness al final de la itera

In [23]:
# # save the results
import pickle
# with open('SAMutual_Information.pkl', 'wb') as f:
#     pickle.dump(Mutual_Information, f)
# with open('SAChi2.pkl', 'wb') as f:
#     pickle.dump(Chi2, f)
# with open('SAReliefF.pkl', 'wb') as f:
#     pickle.dump(ReliefF, f)
#

In [25]:
#Open the pickle files
with open('/home/testp/CKMIFR.pkl', 'rb') as f:
    Mutual_Information = pickle.load(f)
with open('/home/testp/CKChi2FR.pkl', 'rb') as f:
    Chi2 = pickle.load(f)
with open('/home/testp/CKReliefFFR.pkl', 'rb') as f:
    ReliefF = pickle.load(f)


FileNotFoundError: [Errno 2] No such file or directory: '/home/testp/CKMIFR.pkl'

In [26]:
#sort the results with the best fitness for each method
Mutual_Information.sort(key=lambda x: x[1], reverse=True)
Chi2.sort(key=lambda x: x[1], reverse=True)
ReliefF.sort(key=lambda x: x[1], reverse=True)

In [27]:
#print all the scores in mutual information and the number of generations
for i in Mutual_Information:
    print("Mutual Information: ",i[1], i[2])

Mutual Information:  2.9011705486681283 500
Mutual Information:  2.8802543725200525 450
Mutual Information:  2.8684933527999306 400
Mutual Information:  2.86659836550009 250
Mutual Information:  2.8104297634080964 150
Mutual Information:  2.8097113189361336 350
Mutual Information:  2.8068926302278814 300
Mutual Information:  2.7947836577328187 150
Mutual Information:  2.7838368025861717 100
Mutual Information:  2.7637675606270857 200
Mutual Information:  2.725249606787376 100


In [28]:
print("Mutual Information: \n",Mutual_Information[0])
print("Chi2: \n",Chi2[0])
print("ReliefF: \n",ReliefF[0])

Mutual Information: 
 [['f1_t_mean', 'f2_t_std', 'f4_t_entropy', 'f8_t_rms', 'f9_t_peak2peak', 'f10_t_peak2rms', 'f14_f_peaks_pos_1', 'f15_f_90_percent_energy', 'f18_f_std', 'f20_f_energy', 'f21_f_kurtosis', 'f22_f_multiscaleEntropy', 'f23_f_peak_1020_value', 'f24_f_peak_1020_pos', 'f25_f_peak_2030_value', 'f26_f_peak_2030_pos', 'f28_f_peak2rms', 'f29_f_power', 'f30_f_PeaksAboveRMSDensity_fun', 'f31_f_peaks_val_2', 'f32_f_peaks_pos_2', 'f33_f_peaks_val_3', 'f34_f_peaks_pos_3', 'f35_w_f_maxval_A6', 'f36_w_f_maxval_D1', 'f37_w_f_maxval_D2', 'f39_w_f_maxval_D4', 'f40_w_f_maxval_D5', 'f41_w_f_maxval_D6', 'f42_w_f_maxpos_A6', 'f44_w_f_maxpos_D3', 'f46_w_f_maxpos_D5', 'f47_w_f_maxpos_D6', 'f50_w_f_mean_D2', 'f51_w_f_mean_D3', 'f54_w_f_mean_D6', 'f57_w_t_PEC_D1', 'f58_w_t_PEC_D2', 'f59_w_t_PEC_D3', 'f60_w_t_PEC_D4', 'f61_w_t_PEC_D5', 'f62_w_t_PEC_D6', 'f63_w_t_rms_A6', 'f64_w_t_rms_D1', 'f65_w_t_rms_D2', 'f67_w_t_rms_D4', 'f68_w_t_rms_D5', 'f69_w_t_rms_D6', 'f71_w_t_peak2peak_D1', 'f72_w_t_pe

In [29]:
print("Mutual Information: \n",len(Mutual_Information[0][0]))
print("Chi2: \n",len(Chi2[0][0]))
print("ReliefF: \n",len(ReliefF[0][0]))

Mutual Information: 
 58
Chi2: 
 23
ReliefF: 
 27


In [30]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split


In [31]:


#Naive Bayes classifier with all features

cmodel = GaussianNB()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
cmodel.fit(X_train, y_train)
y_pred = cmodel.predict(X_test)
y_pred_proba = cmodel.predict_proba(X_test)[:, 1]  # Probability estimates for AUC

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)

In [32]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)


Accuracy:  0.9432314410480349
Precision:  0.6086956521739131
Recall:  0.7777777777777778
F1:  0.6829268292682927
AUC:  0.9462875197472354


In [34]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)

Accuracy:  0.9432314410480349
Precision:  0.6086956521739131
Recall:  0.7777777777777778
F1:  0.6829268292682927
AUC:  0.9462875197472354


In [35]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)


Accuracy:  0.9432314410480349
Precision:  0.6086956521739131
Recall:  0.7777777777777778
F1:  0.6829268292682927
AUC:  0.9462875197472354


In [36]:
MI_Scores = []
Chi_Scores = []
ReliefF_Scores = []

### Naive Bayes with all scores of mutual information, chi2 and ReliefF

In [37]:
#Naive Bayes with all scores of mutual information
for MI in Mutual_Information:
    model = GaussianNB()
    X_train, X_test, y_train, y_test = train_test_split(X[MI[0]], y, test_size=0.2, random_state=42)
    cmodel.fit(X_train, y_train)
    y_pred = cmodel.predict(X_test)
    y_pred_proba = cmodel.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    # # accuracy = accuracy_score(y_test, y_pred)
    # # precision = precision_score(y_test, y_pred)
    # # recall = recall_score(y_test, y_pred)
    # # f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)
    MI_Scores.append([auc,MI[2]])

In [38]:
for Ch in Chi2:
    model = GaussianNB()
    X_train, X_test, y_train, y_test = train_test_split(X[Ch[0]], y, test_size=0.2, random_state=42)
    cmodel.fit(X_train, y_train)
    y_pred = cmodel.predict(X_test)
    y_pred_proba = cmodel.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    # # accuracy = accuracy_score(y_test, y_pred)
    # # precision = precision_score(y_test, y_pred)
    # # recall = recall_score(y_test, y_pred)
    # # f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)
    Chi_Scores.append([auc,Ch[2]])

In [39]:
for RF in ReliefF:
    model = GaussianNB()
    X_train, X_test, y_train, y_test = train_test_split(X[RF[0]], y, test_size=0.2, random_state=42)
    cmodel.fit(X_train, y_train)
    y_pred = cmodel.predict(X_test)
    y_pred_proba = cmodel.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    # # accuracy = accuracy_score(y_test, y_pred)
    # # precision = precision_score(y_test, y_pred)
    # # recall = recall_score(y_test, y_pred)
    # # f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)
    ReliefF_Scores.append([auc,RF[2]])

In [40]:
MI_Scores.sort(key=lambda x: x[0], reverse=True)
Chi_Scores.sort(key=lambda x: x[0], reverse=True)
ReliefF_Scores.sort(key=lambda x: x[0], reverse=True)

In [41]:
for i in MI_Scores:
    print("Mutual Information: ",i[0], i[1])

Mutual Information:  0.9652448657187993 300
Mutual Information:  0.9626119010005265 500
Mutual Information:  0.9599789362822538 450
Mutual Information:  0.9560294892048447 400
Mutual Information:  0.9557661927330173 200
Mutual Information:  0.9547130068457083 150
Mutual Information:  0.9518167456556081 100
Mutual Information:  0.9476040021063717 100
Mutual Information:  0.9462875197472355 150
Mutual Information:  0.9460242232754081 350
Mutual Information:  0.9415481832543443 250


In [42]:
for i in Chi_Scores:
    print("Chi2: ",i[0], i[1])

Chi2:  0.9784096893101633 100
Chi2:  0.977751448130595 300
Chi2:  0.9726171669299631 250
Chi2:  0.9697209057398631 500
Chi2:  0.9681411269088994 450
Chi2:  0.9612954186413902 150
Chi2:  0.9583991574512902 400
Chi2:  0.9570826750921537 200
Chi2:  0.9476040021063717 350


In [43]:
for i in ReliefF_Scores:
    print("ReliefF: ",i[0], i[1])

ReliefF:  0.916535018430753 300
ReliefF:  0.9049499736703528 450
ReliefF:  0.894286466561348 400
ReliefF:  0.8865192206424433 250
ReliefF:  0.8601895734597157 500
ReliefF:  0.84333859926277 350
ReliefF:  0.8384676145339652 100
ReliefF:  0.8353080568720379 150
ReliefF:  0.7840968931016324 200


### Random Forest Classifier with all features

In [44]:
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
y_pred_proba = random_forest.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba)


In [45]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)
print("AUC: ", auc)


Accuracy:  0.9650655021834061
Precision:  0.8571428571428571
Recall:  0.6666666666666666
F1:  0.75
AUC:  0.9794628751974724


In [46]:
RFMI_Scores = []
RFChi_Scores = []
RFReliefF_Scores = []

### Random Forest Classifier with all scores of mutual information, chi2 and ReliefF

In [47]:
#Random Forest Classifier with Mutual Info selected features
for M in Mutual_Information:
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X[M[0]], y, test_size=0.2, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    auc = roc_auc_score(y_test, y_pred_proba)
    RFMI_Scores.append([auc,M[2]])

In [48]:
#Random Forest Classifier with Chi2 selected features
for C in Chi2:
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X[C[0]], y, test_size=0.2, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    auc = roc_auc_score(y_test, y_pred_proba)
    RFChi_Scores.append([auc,C[2]])

In [49]:
#Random Forest Classifier with ReliefF selected features
for R in ReliefF:
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X[R[0]], y, test_size=0.2, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]  # Probability estimates for AUC
    auc = roc_auc_score(y_test, y_pred_proba)
    RFReliefF_Scores.append([auc,R[2]])


In [50]:
RFMI_Scores.sort(key=lambda x: x[0], reverse=True)
RFChi_Scores.sort(key=lambda x: x[0], reverse=True)
RFReliefF_Scores.sort(key=lambda x: x[0], reverse=True)


In [51]:
for m in Mutual_Information:
    print(len(m[0]))

58
63
56
54
62
62
54
55
58
54
61


In [52]:
for i in RFMI_Scores:
    # Find the corresponding entry in Mutual_Information based on the number of generations
    corresponding_entry = next((entry for entry in Mutual_Information if entry[2] == i[1]), None)
    if corresponding_entry:
        print("Mutual Information: ", i[0], i[1], len(corresponding_entry[0]))

Mutual Information:  0.9835439705107951 350 62
Mutual Information:  0.9806477093206951 250 54
Mutual Information:  0.9805160610847815 200 54
Mutual Information:  0.9802527646129542 400 56
Mutual Information:  0.9784096893101633 150 62
Mutual Information:  0.9774881516587679 300 54
Mutual Information:  0.9766982622432859 150 62
Mutual Information:  0.9764349657714586 450 63
Mutual Information:  0.9755134281200633 100 58
Mutual Information:  0.9747235387045814 100 58
Mutual Information:  0.9735387045813586 500 58


In [53]:
for i in RFChi_Scores:
    # Find the corresponding entry in Mutual_Information based on the number of generations
    corresponding_entry = next((entry for entry in Chi2 if entry[2] == i[1]), None)
    if corresponding_entry:
        print("Chi2: ", i[0], i[1], len(corresponding_entry[0]))

Chi2:  0.9836756187467088 300 25
Chi2:  0.978146392838336 200 29
Chi2:  0.9772248551869405 350 26
Chi2:  0.9757767245918905 250 26
Chi2:  0.9752501316482359 500 26
Chi2:  0.974855186940495 150 32
Chi2:  0.9739336492890995 450 26
Chi2:  0.9706424433912586 100 27
Chi2:  0.9628751974723538 400 23


In [54]:
for i in RFReliefF_Scores:
    # Find the corresponding entry in Mutual_Information based on the number of generations
    corresponding_entry = next((entry for entry in ReliefF if entry[2] == i[1]), None)
    if corresponding_entry:
        print("ReliefF: ", i[0], i[1], len(corresponding_entry[0]))

ReliefF:  0.9844655081621906 300 27
ReliefF:  0.9734070563454449 450 28
ReliefF:  0.968272775144813 100 35
ReliefF:  0.9589257503949448 500 27
ReliefF:  0.9514218009478672 350 30
ReliefF:  0.9507635597682991 200 25
ReliefF:  0.9366771985255398 400 28
ReliefF:  0.9339125855713533 150 27
ReliefF:  0.9186413902053713 250 29


In [55]:
import pandas as pd
#print all the scores in mutual information and the number of generations
for i in RFMI_Scores:
    print("Mutual Information: ",i[0], i[1])



Mutual Information:  0.9835439705107951 350
Mutual Information:  0.9806477093206951 250
Mutual Information:  0.9805160610847815 200
Mutual Information:  0.9802527646129542 400
Mutual Information:  0.9784096893101633 150
Mutual Information:  0.9774881516587679 300
Mutual Information:  0.9766982622432859 150
Mutual Information:  0.9764349657714586 450
Mutual Information:  0.9755134281200633 100
Mutual Information:  0.9747235387045814 100
Mutual Information:  0.9735387045813586 500


### Red Neuronal