# XGBoost regression

## Imports

In [5]:
# Custom
import sys
sys.path.append('../')
from utils.dataset_manager import fit_dataset
from utils.constant import FEATURES, LABELS, ALL_ATTACKS 

# General
import warnings
import numpy as np
from joblib import dump

# Model and Metrics
import xgboost as xgb
from sklearn.metrics import classification_report

# Warning
warnings.filterwarnings('ignore')

## Dataset

In [6]:
n_files = 20

df_train, df_test = fit_dataset(n_files, ALL_ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

# Prints
print('Training Population: {}'.format(len(df_train)))
print('Testing Population: {}'.format(len(df_test)))

  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:39<00:00,  1.99s/it]
100%|██████████| 6/6 [00:07<00:00,  1.18s/it]


Training Population: 4723822
Testing Population: 1648176


## Model

In [7]:
# Model
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)

# Train
xgb_model.fit(X_train, y_train)

In [8]:
# Save Model
name = f"../outputs/xgboost_{n_files}.joblib"
dump(xgb_model, name)

['../outputs/xgboost_20.joblib']

## Evaluation

In [9]:
X_test, y_test = df_test[FEATURES], df_test[LABELS]

# Predict
y_pred = xgb_model.predict(X_test)

# Evaluate
print("Classification Report:\n{}".format(classification_report(y_test, y_pred)))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    142361
           1       1.00      1.00      1.00    144128
           2       1.00      1.00      1.00    143521
           3       1.00      1.00      1.00    191686
           4       1.00      1.00      1.00    159101
           5       1.00      1.00      1.00    254077
           6       1.00      1.00      1.00    126849
           7       1.00      1.00      1.00     10061
           8       1.00      1.00      1.00     10244
           9       1.00      1.00      1.00     16043
          10       0.92      0.98      0.95       844
          11       0.99      0.94      0.96      1050
          12       1.00      1.00      1.00    116827
          13       1.00      1.00      1.00     71688
          14       1.00      1.00      1.00     94194
          15       0.98      0.99      0.98      2474
          16       1.00      1.00      1.00     35144
    