In [1]:
# Custom
import sys
sys.path.append('../')
from utils.fit_dataset import fit_dataset
from utils.constant import FEATURES, LABELS, ATTACKS 

# General
import warnings
from joblib import dump

# Model and Metrics
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report

# Warnings
warnings.filterwarnings('ignore')

# Dataset

In [2]:
n_files = 10

df_train, df_test = fit_dataset(n_files, ATTACKS)

X_train, y_train = df_train[FEATURES], df_train[LABELS]

# Prints
print('Training Population: {}'.format(len(df_train)))
print('Testing Population: {}'.format(len(df_test)))

100%|██████████| 10/10 [00:08<00:00,  1.15it/s]
100%|██████████| 3/3 [00:02<00:00,  1.11it/s]


Training Population: 2366956
Testing Population: 711796


# Model

In [3]:
# Classifiers
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
logistic_model = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=42)

# Create an ensemble using VotingClassifier
ensemble_model = VotingClassifier(estimators=[
    ('xgb', xgb_model),
    ('rf', rf_model),
    ('logistic', logistic_model)],
    voting='soft')

# Fit the ensemble model on the training data
ensemble_model.fit(X_train, y_train)

In [4]:
# Save de Model
name = f"../outputs/voting_classifier_{n_files}.joblib"
dump(ensemble_model, name)

['../outputs/voting_classifier_10.joblib']

# Evaluation

In [7]:
X_test, y_test = df_test[FEATURES], df_test[LABELS]

# Predict
y_pred = xgb_model.predict(X_test)

# Evaluate
print('Accuracy: {:4f}'.format(accuracy_score(y_test, y_pred)))
print('Recall: {:4f}'.format(recall_score(y_test, y_pred)))
print('Precision: {:4f}'.format(precision_score(y_test, y_pred)))
print('F1: {:4f}'.format(f1_score(y_test, y_pred)))
print("Classification Report:\n{}".format(classification_report(y_test, y_pred)))

NotFittedError: need to call fit or load_model beforehand