# Bibliotecas usadas

In [None]:
import pandas as pd
import numpy as np
import sweetviz as sv
import pickle as pkl
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier
import xgboost as xgb

from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

# Funções de utilidade

In [None]:
def run_model(model, X_train, X_test, y_train, y_test, verbose = False, scores = False):
  model.fit(X_train, y_train)
  y_predicted = model.predict(X_test)

  train_accuracy = model.score(X_train, y_train)
  test_accuracy = accuracy_score(y_test, y_predicted)
  test_balanced_accuracy = balanced_accuracy_score(y_test, y_predicted)

  if verbose:
    print(f"Acurácia no treino {train_accuracy} \nAcurácia no teste: {test_accuracy} \nAcurácia balanceada no teste: {test_balanced_accuracy}")

  if scores:
    return (pd.DataFrame({"Acurácia no Treino": train_accuracy,
                         "Acurácia no Teste": test_accuracy, 
                         "Acurácia Balanceada no Teste": test_balanced_accuracy,
                         "Precisão": classification_report(y_test, y_predicted, output_dict = True)["1"]["precision"],
                         "Recall": classification_report(y_test, y_predicted, output_dict = True)["1"]["recall"],
                         "F1": classification_report(y_test, y_predicted, output_dict = True)["1"]["f1-score"]}), 
                         confusion_matrix(y_test, y_predicted))

# Carrega-se os dados pré-processados

In [None]:
df = pkl.load(open("../data/files/Normalized_Data.pkl", "rb"))

In [None]:
X = df.drop(columns = ["attack_cat", "Label"])
y = df[["attack_cat", "Label"]]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y[["Label", "attack_cat"]], test_size = 0.2, stratify = y, random_state = 42)

del df

# Separação da base em K-Folds

In [None]:
k_folds = KFold(n_splits = 10, shuffle = True, random_state = 42)

# Seleção das características mais importantes

In [None]:
X_train = X_train[["dstip", "ct_state_ttl", "sttl", "srcip", "sbytes"]]
X_test = X_test[["dstip", "ct_state_ttl", "sttl", "srcip", "sbytes"]]

# Execução dos modelos | Classificação binária

In [None]:
ada_boost = AdaBoostClassifier(random_state = 42, algorithm = "SAMME", n_estimators = 200, learning_rate = 1)
random_forest = RandomForestClassifier(random_state = 42, n_estimators = 200, max_depth = 10, min_samples_split = 10)
bagging = BaggingClassifier(random_state = 42)
stacking = StackingClassifier(random_state = 42)
mlp = MLPClassifier(random_state = 42)
xgboost = xgb.XGBClassifier(random_state = 42, n_estimators = 200, max_depth = 10, learning_rate = 1, min_samples_split = 10)

In [None]:
ada_boost_scores = pd.DataFrame(), random_forest_scores = pd.DataFrame(), bagging_scores = pd.DataFrame(), stacking_scores = pd.DataFrame(), mlp_scores = pd.DataFrame(), xgboost_scores = pd.DataFrame()
ada_boost_confusion_matrix = None, random_forest_confusion_matrix = None, bagging_confusion_matrix = None, stacking_confusion_matrix = None, mlp_confusion_matrix = None, xgboost_confusion_matrix = None


for i, j in k_folds.split(X):
  X_train, X_test = X.iloc[i], X.iloc[j]
  y_train, y_test = y.iloc[i], y.iloc[j]


  print("AdaBoost")
  model_scores, model_confusion_matrix = run_model(ada_boost, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  ada_boost_scores = pd.concat([ada_boost_scores, model_scores])
  ada_boost_confusion_matrix.append(model_confusion_matrix)
  print("\nRandom Forest")
  model_scores, model_confusion_matrix = run_model(random_forest, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  random_forest_scores = pd.concat([random_forest_scores, model_scores])
  random_forest_confusion_matrix.append(model_confusion_matrix)
  print("\nBagging")
  model_scores, model_confusion_matrix = run_model(bagging, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  bagging_scores = pd.concat([bagging_scores, model_scores])
  bagging_confusion_matrix.append(model_confusion_matrix)
  print("\nStacking")
  model_scores, model_confusion_matrix = run_model(stacking, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  stacking_scores = pd.concat([stacking_scores, model_scores])
  stacking_confusion_matrix.append(model_confusion_matrix)
  print("\nMLP")
  model_scores, model_confusion_matrix = run_model(mlp, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  mlp_scores = pd.concat([mlp_scores, model_scores])
  mlp_confusion_matrix.append(model_confusion_matrix)
  print("\nXGBoost")
  model_scores, model_confusion_matrix = run_model(xgboost, X_train, X_test, y_train["Label"], y_test["Label"], verbose = True, scores = True)
  xgboost_scores = pd.concat([xgboost_scores, model_scores])
  xgboost_confusion_matrix.append(model_confusion_matrix)
  

# Execução dos modelos | Classificação multiclasse