In [1]:
from Metaheuristicas.fitness_functions import *


In [2]:
X, y = load_and_preprocess_data(filename='Resources/SeisBenchV1_v1_1.json')


In [3]:
from Metaheuristicas.Simmulated_Annealing import simulated_annealing

cooling_rate = 0.95
max_iter = 680

#### Models Naive Bayes and Decision Tree

In [4]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

NB = GaussianNB()
DT = RandomForestClassifier()

#dataset split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# SA with Mutual Information


In [5]:
best_solution, best_Gfitness = simulated_annealing(X, y, initial_temperature=1, cooling_rate=cooling_rate,max_iter=max_iter ,fitness_function=mutual_information_eval)
SAMIFeatures = X.columns[best_solution.astype(bool)].tolist()

In [6]:
NB.fit(X_train[SAMIFeatures], y_train)

y_pred = NB.predict(X_test[SAMIFeatures])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))


Accuracy: 0.9331395348837209
Precision: 0.5
Recall: 0.782608695652174
F1: 0.6101694915254238
AUC: 0.8632669646485168


In [7]:
DT.fit(X_train[SAMIFeatures], y_train)

y_pred = DT.predict(X_test[SAMIFeatures])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9651162790697675
Precision: 0.7894736842105263
Recall: 0.6521739130434783
F1: 0.7142857142857143
AUC: 0.8198564269267236


# SA with Chi2

In [8]:
best_solution, best_Gfitness = simulated_annealing(X, y, initial_temperature=1, cooling_rate=cooling_rate,max_iter=max_iter ,fitness_function=mutual_information_eval)
SAX2Features = X.columns[best_solution.astype(bool)].tolist()

In [9]:
NB.fit(X_train[SAX2Features], y_train)
y_pred = NB.predict(X_test[SAX2Features])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9418604651162791
Precision: 0.5428571428571428
Recall: 0.8260869565217391
F1: 0.6551724137931034
AUC: 0.8881213598808073


In [10]:
DT.fit(X_train[SAX2Features], y_train)
y_pred = DT.predict(X_test[SAX2Features])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9709302325581395
Precision: 0.8421052631578947
Recall: 0.6956521739130435
F1: 0.7619047619047619
AUC: 0.8431531897602601


# SA with ReliefF

In [11]:
best_solution, best_Gfitness = simulated_annealing(X, y, initial_temperature=0.1, cooling_rate=cooling_rate,max_iter=max_iter ,fitness_function=mutual_information_eval)
SARFFeatures = X.columns[best_solution.astype(bool)].tolist()

In [12]:
NB.fit(X_train[SARFFeatures], y_train)
y_pred = NB.predict(X_test[SARFFeatures])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9418604651162791
Precision: 0.5454545454545454
Recall: 0.782608695652174
F1: 0.6428571428571429
AUC: 0.8679398618447786


In [13]:
DT.fit(X_train[SARFFeatures], y_train)
y_pred = DT.predict(X_test[SARFFeatures])

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred))

Accuracy: 0.9680232558139535
Precision: 0.8
Recall: 0.6956521739130435
F1: 0.7441860465116279
AUC: 0.8415955573615062


## Red Neuronal


In [14]:
print("Features selected by MI: ", len(SAMIFeatures))
print("Features selected by X2: ", len(SAX2Features))
print("Features selected by ReliefF: ", len(SARFFeatures))

Features selected by MI:  60
Features selected by X2:  60
Features selected by ReliefF:  60


In [15]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

MUTUAL_INFORMATION 

In [16]:

RN = Sequential()

input_dim = X_train[SAMIFeatures].shape[1]
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))
RN.add(Dense(units=32, activation='relu'))
RN.add(Dense(units=1, activation='sigmoid'))
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

history = RN.fit(X_train[SAMIFeatures], y_train, epochs=100, batch_size=32, validation_split=0.2)

loss, accuracy = RN.evaluate(X_test[SAMIFeatures], y_test)

print(f'Test Accuracy: {accuracy}')
y_pred_prob = RN.predict(X_test[SAMIFeatures])
y_pred = (y_pred_prob > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5683 - loss: 0.6542 - val_accuracy: 0.9193 - val_loss: 0.3922
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9066 - loss: 0.3653 - val_accuracy: 0.9193 - val_loss: 0.2328
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8902 - loss: 0.3011 - val_accuracy: 0.9193 - val_loss: 0.2012
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9029 - loss: 0.2295 - val_accuracy: 0.9255 - val_loss: 0.1613
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9127 - loss: 0.1772 - val_accuracy: 0.9317 - val_loss: 0.1251
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9531 - loss: 0.1211 - val_accuracy: 0.9379 - val_loss: 0.1065
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━

CHI2

In [17]:

RN = Sequential()

input_dim = X_train[SAX2Features].shape[1]
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))
RN.add(Dense(units=32, activation='relu'))
RN.add(Dense(units=1, activation='sigmoid'))
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

history = RN.fit(X_train[SAX2Features], y_train, epochs=100, batch_size=32, validation_split=0.2)

loss, accuracy = RN.evaluate(X_test[SAX2Features], y_test)

print(f'Test Accuracy: {accuracy}')
y_pred_prob = RN.predict(X_test[SAX2Features])
y_pred = (y_pred_prob > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.5974 - loss: 0.6420 - val_accuracy: 0.9193 - val_loss: 0.3288
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8900 - loss: 0.3544 - val_accuracy: 0.9193 - val_loss: 0.2220
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9015 - loss: 0.2583 - val_accuracy: 0.9193 - val_loss: 0.1859
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9218 - loss: 0.1903 - val_accuracy: 0.9317 - val_loss: 0.1413
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9465 - loss: 0.1507 - val_accuracy: 0.9503 - val_loss: 0.1121
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9501 - loss: 0.1350 - val_accuracy: 0.9379 - val_loss: 0.1083
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━

RELIEFF

In [18]:
RN = Sequential()

input_dim = X_train[SARFFeatures].shape[1]
RN.add(Dense(units=64, activation='relu', input_dim=input_dim))
RN.add(Dense(units=32, activation='relu'))
RN.add(Dense(units=1, activation='sigmoid'))
RN.compile(optimizer=Adam(learning_rate=0.001),
           loss=BinaryCrossentropy(),
           metrics=['accuracy'])

history = RN.fit(X_train[SARFFeatures], y_train, epochs=100, batch_size=32, validation_split=0.2)

loss, accuracy = RN.evaluate(X_test[SARFFeatures], y_test)

print(f'Test Accuracy: {accuracy}')
y_pred_prob = RN.predict(X_test[SARFFeatures])
y_pred = (y_pred_prob > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_pred_prob))

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9025 - loss: 0.4978 - val_accuracy: 0.9193 - val_loss: 0.2467
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9018 - loss: 0.2957 - val_accuracy: 0.9193 - val_loss: 0.2159
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8662 - loss: 0.3059 - val_accuracy: 0.9193 - val_loss: 0.1820
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9091 - loss: 0.2009 - val_accuracy: 0.9255 - val_loss: 0.1504
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9358 - loss: 0.1555 - val_accuracy: 0.9379 - val_loss: 0.1233
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9516 - loss: 0.1231 - val_accuracy: 0.9441 - val_loss: 0.1043
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━

In [19]:
#Save the results from the optimizations with Pickle
import pickle

with open('SAMIFeatures.pkl', 'wb') as f:
    pickle.dump(SAMIFeatures, f)
with open('SAX2Features.pkl', 'wb') as f:
    pickle.dump(SAX2Features, f)
with open('SARFFeatures.pkl', 'wb') as f:
    pickle.dump(SARFFeatures, f)