In [3]:
# Custom
import sys
sys.path.append('../')
from utils.dataset_manager import fit_dataset, get_classes_weights
from utils.constant import FEATURES, LABELS, ALL_ATTACKS 

# General
import warnings
import numpy as np
from joblib import dump

# Model and Metrics
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report

# Warnings
warnings.filterwarnings('ignore')

# Dataset

In [4]:
n_files = 20

df_train, df_test = fit_dataset(n_files, ALL_ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

# Prints
print('Training Population: {}'.format(len(df_train)))
print('Testing Population: {}'.format(len(df_test)))

100%|██████████| 20/20 [00:39<00:00,  1.97s/it]
100%|██████████| 6/6 [00:07<00:00,  1.32s/it]


Training Population: 4723822
Testing Population: 1648176


# Model

In [5]:
# Classifiers
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
rf_model = RandomForestClassifier(class_weight=get_classes_weights(df_train))
log_reg_model = LogisticRegression(class_weight=get_classes_weights(df_train))

# Create an ensemble using VotingClassifier
ensemble_model = VotingClassifier(estimators=[
    ('xgb', xgb_model),
    ('rf', rf_model),
    ('logistic', log_reg_model)],
    voting='soft')

# Fit the ensemble model on the training data
ensemble_model.fit(X_train, y_train)

In [6]:
# Save de Model
name = f"../outputs/voting_classifier_{n_files}.joblib"
dump(ensemble_model, name)

['../outputs/voting_classifier_20.joblib']

# Evaluation

In [7]:
X_test, y_test = df_test[FEATURES], df_test[LABELS]

# Predict
y_pred = ensemble_model.predict(X_test)

# Evaluate
print("Classification Report:\n{}".format(classification_report(y_test, y_pred)))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    142361
           1       1.00      1.00      1.00    144128
           2       1.00      1.00      1.00    143521
           3       1.00      1.00      1.00    191686
           4       1.00      1.00      1.00    159101
           5       1.00      1.00      1.00    254077
           6       1.00      1.00      1.00    126849
           7       1.00      1.00      1.00     10061
           8       1.00      1.00      1.00     10244
           9       1.00      1.00      1.00     16043
          10       0.94      0.98      0.96       844
          11       0.97      0.95      0.96      1050
          12       1.00      1.00      1.00    116827
          13       1.00      1.00      1.00     71688
          14       1.00      1.00      1.00     94194
          15       0.99      1.00      0.99      2474
          16       1.00      1.00      1.00     35144
    