# Treinamento do Modelo

In [None]:
# #!pip uninstall tensorflow
# !pip install tensorflow

In [None]:
# Importação das bibliotecas necessárias
from sklearn.model_selection import StratifiedKFold

import random
import gc

import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import regularizers

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report

In [None]:
# Define sementes para a possível replicação dos resultados
def reset_seeds(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

Arquitetura do modelo:

In [None]:
def create_model():
  # Camada de entrada
  inputs = keras.Input(shape=(44, 116, 1))

  # Primeira camada convolucional seguida de normalização e pooling
  x = layers.Conv2D(32, (5, 5), strides=(1,1), padding="same", activation="relu", kernel_regularizer='l2')(inputs)
  x = layers.BatchNormalization(momentum=0.99, epsilon=0.001)(x)
  x = layers.MaxPooling2D(pool_size=(2, 2))(x)

  # Segunda camada convolucional seguida de normalização e pooling
  x = layers.Conv2D(64, (5, 5), strides=(1,1), padding="same", activation="relu", kernel_regularizer='l2')(x)
  x = layers.BatchNormalization(momentum=0.99, epsilon=0.001)(x)
  x = layers.MaxPooling2D(pool_size=(2, 2))(x)

  # Camadas totalmente conectadas com dropout
  x = layers.Flatten()(x)
  x = layers.Dropout(0.3)(x)
  x = layers.Dense(units=64, activation="relu")(x)
  x = layers.Dropout(0.3)(x)
  outputs = layers.Dense(units=1, activation="sigmoid")(x)

  # Criação do modelo
  model = keras.Model(inputs=inputs, outputs=outputs)
  
  # Compilação do modelo com otimizador Adam
  opt = tf.keras.optimizers.Adam(learning_rate=0.001)
  model.compile(loss="binary_crossentropy", optimizer=opt, metrics=[tf.keras.metrics.BinaryAccuracy(name="binary_accuracy")])

  return model

In [None]:
# Caminho para os dados de entrada
input_file_path = "../input/auteth-intrusion-dataset-treated"

# Carregamento dos dados de entrada (features)
data = np.load(f"{input_file_path}/x_indoors_packets_agg.npz")
x = data['arr_0']  # Acessa o array 'x' salvo no arquivo

# Carregamento dos dados de entrada (labels)
data = np.load(f"{input_file_path}/y_indoor_label_agg.npz")
y = data['arr_0']  # Acessa o array 'y' salvo no arquivo

In [None]:
# Verificação da forma dos dados de entrada
x.shape

(643266, 44, 116)

In [None]:
# Reset das seeds, criação e resumo do modelo
reset_seeds()
model = create_model()
print(model.summary())

None


In [None]:
# Configuração do Stratified K-Fold e inicialização das listas para armazenar métricas e perdas
skf = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)
models = []
metrics_list = []  # Initialize metrics_list
losses = []  # List to store training and validation losses

# Loop pelos folds
for fold_number, (train_index, val_index) in enumerate(skf.split(x, y)):
    print(f"Fold atual: {fold_number + 1}")

    # Configuração dos callbacks para checkpoint e early stopping
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f"model_{fold_number}.keras",
        monitor='binary_accuracy',
        save_best_only=True,
        mode='max'
    )
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        verbose=1
    )
    callbacks_list = [early_stopping, checkpoint]
    
    # Treinamento do modelo
    model = create_model()
    history = model.fit(
        x[train_index],
        y[train_index],
        batch_size=64,
        epochs=30,
        validation_data=(x[val_index], y[val_index]),
        callbacks=callbacks_list
    )
    
    # Armazenamento das perdas de treinamento e validação
    losses.append({
        "fold": fold_number + 1,
        "train_loss": history.history["loss"],
        "val_loss": history.history["val_loss"]
    })

    # Predição e binarização dos resultados
    predictions = model.predict(x[val_index])
    y_pred = (predictions > 0.5).astype(int)  # Assign to y_pred
   
    # Armazenamento das métricas de avaliação
    report = classification_report(y[val_index], y_pred, output_dict=True)
    metrics_dict = {
        "fold": fold_number + 1,
        "accuracy": report["accuracy"],
        "precision": report["1"]["precision"],  # Precisão da classe 1
        "recall": report["1"]["recall"],  # Recall da classe 1
        "f1-score": report["1"]["f1-score"],  # F1-score da classe 1
        "roc_auc": roc_auc_score(y[val_index], y_pred)  # roc_auc da classe 1
    }
    metrics_list.append(metrics_dict)
   
    # Limpeza da memória
    del model, predictions, y_pred
    gc.collect()
    tf.keras.backend.clear_session()
    
# Salvamento das métricas em um arquivo CSV
metrics_df = pd.DataFrame(metrics_list)
metrics_df.to_csv("models_metrics.csv", index=False)

Fold atual: 1
Epoch 1/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 9ms/step - binary_accuracy: 0.9209 - loss: 0.2877 - val_binary_accuracy: 0.9836 - val_loss: 0.0852
Epoch 2/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - binary_accuracy: 0.9795 - loss: 0.0994 - val_binary_accuracy: 0.9886 - val_loss: 0.0692
Epoch 3/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - binary_accuracy: 0.9835 - loss: 0.0813 - val_binary_accuracy: 0.9914 - val_loss: 0.0571
Epoch 4/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - binary_accuracy: 0.9852 - loss: 0.0747 - val_binary_accuracy: 0.9882 - val_loss: 0.0778
Epoch 5/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - binary_accuracy: 0.9868 - loss: 0.0700 - val_binary_accuracy: 0.9925 - val_loss: 0.0506
Epoch 6/30
[1m8041/8041[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - 