In [46]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, ConfusionMatrixDisplay, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,GRU, Dropout, Dense
from tensorflow.keras.optimizers import Adam


In [47]:
df = pd.read_csv("../../data/Merge_Falhas_Resultados.csv")
df["S_GROUP_ID_1"]

0        1
1        0
2        4
3        2
4        1
        ..
40149    4
40150    0
40151    4
40152    2
40153    2
Name: S_GROUP_ID_1, Length: 40154, dtype: int64

In [48]:
# Converter a coluna para binário
df['S_GROUP_ID_1'] = (df['S_GROUP_ID_1'] > 0).astype(int)
df['S_GROUP_ID_1']

0        1
1        0
2        1
3        1
4        1
        ..
40149    1
40150    0
40151    1
40152    1
40153    1
Name: S_GROUP_ID_1, Length: 40154, dtype: int32

In [49]:
# Separando as features (X) e o target (y)
X = df.drop(columns=['S_GROUP_ID_1', 'KNR'])  # 'KNR' é apenas um identificador, então deve ser removido
y = df['S_GROUP_ID_1']

In [50]:
# Separando em dados de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
# Converte X_train e X_test para arrays NumPy, caso ainda não sejam.
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reestrutura X_train e X_test para ter 3 dimensões.
# A nova forma do array será (n_samples, n_features, 1)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [52]:
# Construção do modelo com LSTM
model_1 = Sequential()

model_1.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))
model_1.add(LSTM(50, activation='relu'))
model_1.add(Dense(1))

model_1.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


In [53]:
# Treinamento do modelo
model_1.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 18ms/step - loss: 20607.0117 - val_loss: 24.3034
Epoch 2/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 23.1597 - val_loss: 17.7513
Epoch 3/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 17.1813 - val_loss: 13.8541
Epoch 4/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 13.5462 - val_loss: 9.8036
Epoch 5/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - loss: 9.8860 - val_loss: 6.4078
Epoch 6/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 6.1471 - val_loss: 3.5133
Epoch 7/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 3.3255 - val_loss: 2.6422
Epoch 8/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - loss: 2.3256 - val_loss: 1.357

<keras.src.callbacks.history.History at 0x2375ca66950>

In [54]:
#Prever os dados de teste
y_pred_1 = model_1.predict(X_test)

# Converter as probabilidades em classes binárias (0 ou 1)
y_pred_classes_1 = (y_pred_1 > 0.5).astype(int)

# Calcular as principais métricas
accuracy = accuracy_score(y_test, y_pred_classes_1)
precision = precision_score(y_test, y_pred_classes_1)
recall = recall_score(y_test, y_pred_classes_1)
f1 = f1_score(y_test, y_pred_classes_1)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# # Matriz de Confusão
# cm = confusion_matrix(y_test, y_pred_classes)

# # Exibindo a Matriz de Confusão
# disp = ConfusionMatrixDisplay(confusion_matrix=cm)
# disp.plot(cmap=plt.cm.Blues)
# plt.title('Confusion Matrix')
# plt.show()

[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Accuracy: 0.6225
Precision: 0.6225
Recall: 1.0000
F1-Score: 0.7673


In [55]:
df["S_GROUP_ID_1"].describe()

count    40154.000000
mean         0.621507
std          0.485017
min          0.000000
25%          0.000000
50%          1.000000
75%          1.000000
max          1.000000
Name: S_GROUP_ID_1, dtype: float64

In [56]:
# Construção do modelo com GRU
model_2 = Sequential()

model_2.add(GRU(50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))
model_2.add(GRU(50, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

model_2.compile(optimizer='adam', loss='binary_crossentropy')

  super().__init__(**kwargs)


In [57]:
# Treinamento do modelo
model_2.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 24ms/step - loss: 0.9091 - val_loss: 0.6834
Epoch 2/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 24ms/step - loss: 0.6895 - val_loss: 0.6709
Epoch 3/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 26ms/step - loss: 0.6911 - val_loss: 0.6827
Epoch 4/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 30ms/step - loss: 0.6771 - val_loss: 0.7002
Epoch 5/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 26ms/step - loss: 0.6794 - val_loss: 0.6633
Epoch 6/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 27ms/step - loss: 0.6682 - val_loss: 0.6650
Epoch 7/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 26ms/step - loss: 0.6649 - val_loss: 0.6613
Epoch 8/50
[1m1004/1004[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 26ms/step - loss: 0.6649 - val_loss: 0.6625
Epoch 9/

<keras.src.callbacks.history.History at 0x2376a9fa450>

In [58]:
# Prever os dados de teste
y_pred_2 = model_2.predict(X_test)

# Converter as probabilidades em classes binárias (0 ou 1)
y_pred_classes_2 = (y_pred_2 > 0.5).astype(int)

# Calcular as principais métricas
accuracy = accuracy_score(y_test, y_pred_classes_2)
precision = precision_score(y_test, y_pred_classes_2)
recall = recall_score(y_test, y_pred_classes_2)
f1 = f1_score(y_test, y_pred_classes_2)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
Accuracy: 0.6197
Precision: 0.6406
Recall: 0.8864
F1-Score: 0.7437
