In [1]:
import os
import random
import warnings
import numpy as np
import pandas as pd
import xgboost as xgb
from tqdm import tqdm
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import sys
sys.path.append('../')
from utils.fit_dataset import fit_dataset
from utils.constant import FEATURES, LABELS, ATTACKS 

# XGBoost regression

## Creating datasets

In [2]:
df_train, df_test = fit_dataset(40, ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

100%|██████████| 40/40 [00:50<00:00,  1.26s/it]
100%|██████████| 12/12 [00:09<00:00,  1.22it/s]


In [3]:
# Calculate the scale_pos_weight
positive_class = np.sum(y_train == 1)
negative_class = np.sum(y_train == 0)
scale_pos_weight = negative_class / positive_class

## Training

In [4]:
# Create an instance of XGBClassifier with scale_pos_weight
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, scale_pos_weight=scale_pos_weight)

# Fit the model on the training data
xgb_model.fit(X_train, y_train)

In [6]:
# Predict
X_test, y_test = df_test[FEATURES], df_test[LABELS]
preds = xgb_model.predict(X_test)
# svae model
xgb_model.save_model('xgb.model.bin.json')

In [7]:
accuracy_score(y_test, preds)

0.9915320499021495

In [8]:
# Classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.74      1.00      0.85     71760
           1       1.00      0.99      1.00   2984942

    accuracy                           0.99   3056702
   macro avg       0.87      1.00      0.92   3056702
weighted avg       0.99      0.99      0.99   3056702



## Param Grid XGBoost regressor

In [None]:
param_grid = {
    "max_depth": [3, 4, 5, 7],
    "learning_rate": [0.1, 0.01, 0.05],
    "gamma": [0, 0.25, 1],
    "reg_lambda": [0, 1, 10],
    "scale_pos_weight": [1, 3, 5],
    "subsample": [0.8],
    "colsample_bytree": [0.5],
}

# Init classifier
xgb_cl = xgb.XGBClassifier(objective="binary:logistic")

# Init Grid Search
grid_cv = GridSearchCV(xgb_cl, param_grid, n_jobs=-1, cv=3, scoring="roc_auc")

# Fit
_ = grid_cv.fit(x_train, y_train)

grid_cv.best_score_

NameError: name 'x_train' is not defined

# Binary classification

In [None]:
import sys
sys.path.append('../')
from utils.constant import DATASET_DIRECTORY, FEATURES, LABELS, SYN_FLOOD_ATTACKS

