In [46]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import fbeta_score, precision_score, precision_recall_curve, auc, roc_curve
from xgboost import XGBClassifier


In [13]:
# Leitura dos datasets de treino e teste no formato Parquet
train_df = pd.read_parquet('./CSE-CIC-IDS2018/pre-processed/2024-07-28_18-39-43/train_dataset_treated.parquet')
test_df = pd.read_parquet('./CSE-CIC-IDS2018/pre-processed/2024-07-28_18-39-43/test_dataset_treated.parquet')

In [None]:
# Leitura dos datasets de treino e de teste completos
train_df = pd.read_parquet('')
test_df = pd.read_parquet('')

In [14]:
# Separação das features e do target para o dataset de treino
X_train = train_df.drop('Label', axis=1)
y_train = train_df['Label']

In [15]:
# Separação das features e do target para o dataset de teste
X_test = test_df.drop('Label', axis=1)
y_test = test_df['Label']

In [33]:
# Definição e treinamento do modelo Random Forest
rf = RandomForestClassifier(random_state=42,
                            n_estimators=300,
                            max_depth=None,
                            min_samples_split=2
                            )

rf.fit(X_train, y_train)

In [17]:
# Definição e treinamento do modelo Adaptative Boost
adb = AdaBoostClassifier(random_state=42,
                         n_estimators=200,
                         learning_rate=0.1)

adb.fit(X_train, y_train)



In [25]:
# Definição e treinamento do modelo XGBoost
xgb = XGBClassifier(random_state=42,
                    use_label_encoder=False, 
                    eval_metric='logloss',
                    n_estimators=300,
                    max_depth=3,
                    learning_rate=0.2,
                    subsample=0.9,
                    colsample_bytree=0.8
                    )

xgb.fit(X_train, y_train)

In [48]:
# Definição e treinamento do modelo GBM
gbm = GradientBoostingClassifier(random_state=42,
                                n_estimators=300,
                                max_depth=6,
                                min_samples_split=2,
                                min_samples_leaf=1,
                                learning_rate=0.1,
                                subsample=0.5,
                                max_features=0.7,
                    )

gbm.fit(X_train, y_train)

In [26]:
# Previsões no conjunto de teste
y_pred = rf.predict(X_test)
y_pred_proba = rf.predict_proba(X_test)[:, 1]

In [34]:
# Previsões no conjunto de teste
y_pred = adb.predict(X_test)
y_pred_proba = adb.predict_proba(X_test)[:, 1]

In [39]:
# Previsões no conjunto de teste
y_pred = xgb.predict(X_test)
y_pred_proba = xgb.predict_proba(X_test)[:, 1]

In [49]:
# Previsões no conjunto de teste
y_pred = gbm.predict(X_test)
y_pred_proba = gbm.predict_proba(X_test)[:, 1]

In [50]:
# Cálculo das métricas
f2_test_score = fbeta_score(y_test, y_pred, beta=2)
precision = precision_score(y_test, y_pred)
precision_vals, recall_vals, _ = precision_recall_curve(y_test, y_pred_proba)
prauc = auc(recall_vals, precision_vals)
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
false_positive_rate = fpr[1]

In [51]:
print(f'F2 score no conjunto de teste: {f2_test_score}')
print(f'Precisão no conjunto de teste: {precision}')
print(f'PRAUC no conjunto de teste: {prauc}')
print(f'Taxa de falso positivo (FPR) no conjunto de teste: {false_positive_rate}')

F2 score no conjunto de teste: 1.0
Precisão no conjunto de teste: 1.0
PRAUC no conjunto de teste: 1.0
Taxa de falso positivo (FPR) no conjunto de teste: 0.0


In [21]:
metrics_rf = pd.DataFrame({
    'F2 Score': [f2_test_score],
    'Precision': [precision],
    'PRAUC': [prauc],
    'FPR': [false_positive_rate]
})

In [37]:
metrics_adb = pd.DataFrame({
    'F2 Score': [f2_test_score],
    'Precision': [precision],
    'PRAUC': [prauc],
    'FPR': [false_positive_rate]
})

In [44]:
metrics_xgb = pd.DataFrame({
    'F2 Score': [f2_test_score],
    'Precision': [precision],
    'PRAUC': [prauc],
    'FPR': [false_positive_rate]
})

In [10]:
metrics_rf

Unnamed: 0,F2 Score,Precision,PRAUC,FPR
0,1.0,1.0,1.0,0.0


In [38]:
metrics_adb

Unnamed: 0,F2 Score,Precision,PRAUC,FPR
0,1.0,1.0,1.0,0.0


In [45]:
metrics_xgb

Unnamed: 0,F2 Score,Precision,PRAUC,FPR
0,1.0,1.0,1.0,0.0
