# Imports and model functions

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline

from scipy.stats import randint, uniform
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve, RandomizedSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, make_scorer

import time
import psutil
import threading
from memory_profiler import memory_usage

In [36]:
def apply_rf(X_train, y_train, best_params=None, random_state=42, n_jobs=-1, cv=5): 
    measurement_rf = {}
    best_params = best_params or {}

    rf_model = RandomForestClassifier(**best_params, random_state=random_state, n_jobs=n_jobs, verbose=1)
    
    cpu_usage = []
    stop_flag = threading.Event()

    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))

    def train_model():
        rf_model.fit(X_train, y_train)

    try:
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()

        start_time = time.time()
        train_model()
        training_time = time.time() - start_time

        stop_flag.set()
        cpu_thread.join()

        measurement_rf['Training Time (s)'] = training_time
        measurement_rf['Peak CPU Usage (%)'] = max(cpu_usage)
        measurement_rf['Average CPU Usage (%)'] = sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0

        # Modified to use F1 score
        f1_scorer = make_scorer(f1_score, average='weighted')
        cv_scores_rf = cross_val_score(rf_model, X_train, y_train, cv=cv, n_jobs=n_jobs, scoring=f1_scorer)

        return cv_scores_rf, measurement_rf, rf_model

    except Exception as e:
        import traceback
        print("⛔ Full error traceback:")
        traceback.print_exc()
        print(f"Error during Random Forest training: {e}")
        return None, None, None

In [37]:
def eval_dataset_w_RF(X_train, X_test, y_train, y_test, params_rf={'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}):
    # Fitting the model
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, best_params=params_rf)
    
    start_time = time.time()

    # Making predictions
    y_pred_rf = rf_model.predict(X_test)
    
    training_time = time.time() - start_time
    print("Predict Time (s) - ", training_time)
    
    # Evaluating the model performance
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    print(f'Cross validation average score: {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')

    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f'Accuracy on the test set: {accuracy_rf:.4f}')
    
    # Checking computational cost
    print("Resource measurements:", measurement_rf)
    print(classification_report(y_test, y_pred_rf, digits=4))

In [None]:
import optuna
def show_results(X_train, X_test, y_train, y_test, n_trials=100):
    def objective(trial, X_train, y_train, cv=5):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_categorical('max_depth', [None] + list(range(5, 31))),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
            'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None])
        }
        
        cv_scores, _, model = apply_rf(X_train, y_train, best_params=params, cv=cv)
        if cv_scores is None:
            return 0
        return np.mean(cv_scores)
        
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=n_trials)
    best_params = study.best_params
    
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, n_jobs=15, best_params=best_params)
    
    if cv_scores_rf is None:
        print("Model training failed")
        return
    
    y_pred_rf = rf_model.predict(X_test)
    
    # Print unique values to debug
    print("\nUnique values in test set:", np.unique(y_test))
    print("Unique values in predictions:", np.unique(y_pred_rf))
    
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    
    try:
        f1 = f1_score(y_test, y_pred_rf, average='weighted')
        accuracy = accuracy_score(y_test, y_pred_rf)
        
        print("\nModel Evaluation Results:")
        print("-" * 50)
        print(f'Cross validation average score (F1): {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')
        print(f'F1 Score on test set: {f1:.4f}')
        print(f'Accuracy on test set: {accuracy:.4f}')
        print("\nResource Usage:")
        print("-" * 50)
        print("Resource measurements:", measurement_rf)
        print("\nDetailed Classification Report:")
        print("-" * 50)
        print(classification_report(y_test, y_pred_rf))
    
    except Exception as e:
        print(f"Error during metric calculation: {str(e)}")
        print("Types in test set:", y_pred_rf.dtype)
        print("Types in predictions:", y_pred_rf.dtype)
        raise
    
    return rf_model, best_params

# Prep for model training cicids2018

In [39]:
# Reading data
df = pd.read_csv("..\cicids2018_training.csv")

  df = pd.read_csv("..\cicids2018_training.csv")


In [40]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, BorderlineSMOTE

In [41]:
# Preparing training and test splits
X = df.drop('Attack Type', axis=1)
y = df['Attack Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [42]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [43]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [44]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

# Sync classes

In [45]:
# Function to combine classes
def combine_classes(y, class_mapping):
    return y.map(class_mapping)
# Define the mapping
group_mapping_2018 = {
    'Normal Traffic': 'Normal Traffic',
    'DoS': 'DoS',
    'DDoS': 'DDoS',
    'Brute Force': 'Brute Force',
    'Bot': 'Bots',
    'Infilteration': 'Other'}

In [46]:
df["Attack Type"].unique()

array(['Normal Traffic', 'Bot', 'DoS', 'Brute Force', 'DDoS',
       'Infilteration'], dtype=object)

In [47]:
# Apply to all your sets
y_train = combine_classes(y_train, group_mapping_2018)
y_test = combine_classes(y_test, group_mapping_2018)

y_train_scaled_rus_MMS = combine_classes(y_train_scaled_rus_MMS, group_mapping_2018)
y_train_resampled_scaled_MMS_SMOTE = combine_classes(y_train_resampled_scaled_MMS_SMOTE, group_mapping_2018)

# Search best params for MMS SMOTE

In [None]:
rf_model, best_params = show_results(X_train_resampled_scaled_MMS_SMOTE, 
                                    X_test_MMS_scaled,
                                    y_train_resampled_scaled_MMS_SMOTE, 
                                    y_test, 
                                    n_trials=30)

[I 2025-05-09 15:13:57,465] A new study created in memory with name: no-name-0296d409-34ac-4373-96f2-bfeb5ee6d161
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.6min
[W 2025-05-09 15:15:45,762] Trial 0 failed with parameters: {'n_estimators': 122, 'max_depth': 19, 'min_samples_split': 7, 'min_samples_leaf': 9, 'max_features': None} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\ML\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\ogoreltsev.pav\AppData\Local\Temp\ipykernel_14632\2710070721.py", line 18, in <lambda>
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=n_trials)
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ogoreltsev.pav\AppData\Local\Temp\ipykernel_14632\2710070

KeyboardInterrupt: 

In [None]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  9.027400255203247
CV F1: 0.9939 ± 0.0106
Test Accuracy: 0.9990
                precision    recall  f1-score   support

          Bots     0.6898    0.9521    0.8000       584
   Brute Force     0.9993    0.9989    0.9991      2745
          DDoS     0.9998    0.9998    0.9998     38404
           DoS     0.9977    0.9998    0.9987     58124
Normal Traffic     0.9999    0.9989    0.9994    628518
         Other     0.9889    0.9994    0.9941     27851

      accuracy                         0.9990    756226
     macro avg     0.9459    0.9915    0.9652    756226
  weighted avg     0.9991    0.9990    0.9990    756226

Resource Usage: {'Training Time (s)': 38.28263568878174, 'Peak CPU (%)': 99.1, 'Avg CPU (%)': 74.17698412698412}


# Binary with cross-val between datasets

## Prepare data

In [None]:
# Reading data
df = pd.read_csv("..\cicids2018_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [None]:
# Reading data
df = pd.read_csv("..\cicids2017_test_of_2018.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [None]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, BorderlineSMOTE

In [None]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [None]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train, y_train)

In [None]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [None]:
group_mapping_2017 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Other': 'Attack'}

In [None]:
group_mapping_2018 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bot': 'Attack',
    'Infilteration': 'Attack'}

In [None]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2017)
y_test = y_test.map(group_mapping_2018)

## Search Parameters

In [None]:
rf_model, best_params = show_results(X_train_resampled_scaled_MMS_SMOTE, 
                                    X_test_MMS_scaled,
                                    y_train_resampled_scaled_MMS_SMOTE, 
                                    y_test, 
                                    n_trials=30)

[I 2025-05-08 15:52:45,745] A new study created in memory with name: no-name-f6c15b4c-ba73-4338-94d7-8d68efd3f5c4
[I 2025-05-08 15:53:20,941] Trial 0 finished with value: 0.9463691980440976 and parameters: {'n_estimators': 236, 'learning_rate': 0.0031743583016229497, 'max_depth': 5, 'num_leaves': 10, 'subsample': 0.6599208139910135, 'colsample_bytree': 0.9866917940844662, 'reg_alpha': 0.008200707779018225, 'reg_lambda': 3.924635944296941}. Best is trial 0 with value: 0.9463691980440976.
[I 2025-05-08 15:54:15,987] Trial 1 finished with value: 0.9947781486089827 and parameters: {'n_estimators': 251, 'learning_rate': 0.05653593901676786, 'max_depth': 7, 'num_leaves': 61, 'subsample': 0.8930974500057347, 'colsample_bytree': 0.9318301386899517, 'reg_alpha': 0.0018429557707955522, 'reg_lambda': 0.13314839100032558}. Best is trial 1 with value: 0.9947781486089827.
[I 2025-05-08 15:56:22,370] Trial 2 finished with value: 0.9962936879063552 and parameters: {'n_estimators': 717, 'learning_rate'


LightGBM Evaluation Results:
--------------------------------------------------
CV F1: 0.9975 ± 0.0030
Test F1: 0.8548
Test Accuracy: 0.8818

Resource Usage: {'Training Time (s)': 17.43651819229126, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 94.97647058823527}

Classification Report:
              precision    recall  f1-score   support

      Attack       0.99      0.31      0.47   2746847
      BENIGN       0.88      1.00      0.93  13390249

    accuracy                           0.88  16137096
   macro avg       0.93      0.65      0.70  16137096
weighted avg       0.90      0.88      0.85  16137096



In [None]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 285, 'learning_rate': 0.15274828247019778, 'max_depth': 5, 'num_leaves': 44, 'subsample': 0.6210331060028171, 'colsample_bytree': 0.9909317475119969, 'reg_alpha': 0.4761330684134722, 'reg_lambda': 1.8813380938652553})

Predict Time (s) -  18.106565952301025
CV F1: 0.9974 ± 0.0031
Test Accuracy: 0.8844
              precision    recall  f1-score   support

      Attack     0.9908    0.3237    0.4880   2746847
      BENIGN     0.8781    0.9994    0.9348  13390249

    accuracy                         0.8844  16137096
   macro avg     0.9345    0.6616    0.7114  16137096
weighted avg     0.8973    0.8844    0.8588  16137096

Resource Usage: {'Training Time (s)': 7.788529872894287, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 96.71780821917808}


# Binary with cross-val on single dataset

## Prepare data

In [52]:
# Reading data
df = pd.read_csv("..\cicids2018_training.csv")

# Preparing training and test splits
X = df.drop('Attack Type', axis=1)
y = df['Attack Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

  df = pd.read_csv("..\cicids2018_training.csv")


In [53]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, BorderlineSMOTE

In [54]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [55]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [None]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [None]:
group_mapping_2018 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bot': 'Attack',
    'Infilteration': 'Attack'}

In [49]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2018)
y_test = y_test.map(group_mapping_2018)

## Search Parameters

In [None]:
rf_model, best_params = show_results(X_train_resampled_scaled_MMS_SMOTE, 
                                    X_test_MMS_scaled,
                                    y_train_resampled_scaled_MMS_SMOTE, 
                                    y_test, 
                                    n_trials=30)

[I 2025-05-08 17:19:27,084] A new study created in memory with name: no-name-ca30a774-9e69-4742-8e88-1da0b89a4147
[I 2025-05-08 17:20:06,623] Trial 0 finished with value: 0.9984349205058625 and parameters: {'n_estimators': 383, 'learning_rate': 0.12111930457526512, 'max_depth': 3, 'num_leaves': 91, 'subsample': 0.5835162686194524, 'colsample_bytree': 0.6888231079926798, 'reg_alpha': 0.05721460438284763, 'reg_lambda': 0.001587734342698104}. Best is trial 0 with value: 0.9984349205058625.
[I 2025-05-08 17:21:30,157] Trial 1 finished with value: 0.9980886755221992 and parameters: {'n_estimators': 332, 'learning_rate': 0.009589780908912389, 'max_depth': 15, 'num_leaves': 96, 'subsample': 0.5405799783730312, 'colsample_bytree': 0.7626151265013338, 'reg_alpha': 1.9556808899672304, 'reg_lambda': 1.403980100280639}. Best is trial 0 with value: 0.9984349205058625.
[I 2025-05-08 17:22:24,576] Trial 2 finished with value: 0.998981839357992 and parameters: {'n_estimators': 437, 'learning_rate': 0.


LightGBM Evaluation Results:
--------------------------------------------------
CV F1: 0.9991 ± 0.0003
Test F1: 0.9990
Test Accuracy: 0.9990

Resource Usage: {'Training Time (s)': 11.896596431732178, 'Peak CPU (%)': 98.3, 'Avg CPU (%)': 92.89913793103446}

Classification Report:
              precision    recall  f1-score   support

      Attack       0.99      1.00      1.00    127708
      BENIGN       1.00      1.00      1.00    628518

    accuracy                           1.00    756226
   macro avg       1.00      1.00      1.00    756226
weighted avg       1.00      1.00      1.00    756226



In [50]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 115, 'max_depth': 30, 'min_samples_split': 11, 'min_samples_leaf': 1, 'max_features': None})

⛔ Full error traceback:
Error during Random Forest training: Input contains NaN


Traceback (most recent call last):
  File "C:\Users\ogoreltsev.pav\AppData\Local\Temp\ipykernel_14632\2897538565.py", line 22, in apply_rf
    train_model()
  File "C:\Users\ogoreltsev.pav\AppData\Local\Temp\ipykernel_14632\2897538565.py", line 15, in train_model
    rf_model.fit(X_train, y_train)
  File "c:\ML\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ML\Lib\site-packages\sklearn\ensemble\_forest.py", line 360, in fit
    X, y = validate_data(
           ^^^^^^^^^^^^^^
  File "c:\ML\Lib\site-packages\sklearn\utils\validation.py", line 2961, in validate_data
    X, y = check_X_y(X, y, **check_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\ML\Lib\site-packages\sklearn\utils\validation.py", line 1387, in check_X_y
    y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

AttributeError: 'NoneType' object has no attribute 'predict'

# MultiClass with cross-val between datasets

## Prepare data

In [None]:
# Reading data
df = pd.read_csv("..\cicids2018_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [None]:
# Reading data
df = pd.read_csv("..\cicids2017_test_of_2018.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [None]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, BorderlineSMOTE

In [None]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [None]:
X_train_resampled_rus, y_train_resampled_rus = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train, y_train)

In [None]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [None]:
group_mapping_2018 = {
    'Normal Traffic': 'Normal Traffic',
    'DoS': 'DoS',
    'DDoS': 'DDoS',
    'Brute Force': 'Brute Force',
    'Bot': 'Bots',
    'Infilteration': 'Other'}

In [None]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2018)

## Search Parameters

In [None]:
rf_model, best_params = show_results(X_train_resampled_scaled_MMS_SMOTE, 
                                    X_test_MMS_scaled,
                                    y_train_resampled_scaled_MMS_SMOTE, 
                                    y_test, 
                                    n_trials=30)

[I 2025-05-08 18:22:47,278] A new study created in memory with name: no-name-41646ae6-e686-4cd8-8672-c43f676f32f9
[I 2025-05-08 18:25:19,753] Trial 0 finished with value: 0.9773941901396974 and parameters: {'n_estimators': 292, 'learning_rate': 0.009817646069583031, 'max_depth': 10, 'num_leaves': 17, 'subsample': 0.9975221533290692, 'colsample_bytree': 0.506685846212074, 'reg_alpha': 0.4110947731660012, 'reg_lambda': 0.0011018990142059504}. Best is trial 0 with value: 0.9773941901396974.
[I 2025-05-08 18:29:46,996] Trial 1 finished with value: 0.9763116617159586 and parameters: {'n_estimators': 389, 'learning_rate': 0.005558281677372325, 'max_depth': 11, 'num_leaves': 85, 'subsample': 0.9706927476707258, 'colsample_bytree': 0.9667394297939542, 'reg_alpha': 0.05879130150671738, 'reg_lambda': 0.3880107868025716}. Best is trial 0 with value: 0.9773941901396974.
[I 2025-05-08 18:35:03,793] Trial 2 finished with value: 0.9860617839709296 and parameters: {'n_estimators': 699, 'learning_rate'

KeyboardInterrupt: 

In [None]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 782, 'learning_rate': 0.17020900593347912, 'max_depth': 9, 'num_leaves': 31, 'subsample': 0.6804410895439533, 'colsample_bytree': 0.9448604525697095, 'reg_alpha': 0.0011270587279211745, 'reg_lambda': 7.867365972878037})

Predict Time (s) -  449.11964774131775
CV F1: 0.9865 ± 0.0250
Test Accuracy: 0.8664
                precision    recall  f1-score   support

          Bots     0.9995    0.0269    0.0525    286191
   Brute Force     0.9807    0.4131    0.5813    381784
          DDoS     1.0000    0.0010    0.0019   1263933
           DoS     0.9933    0.7062    0.8255    654300
Normal Traffic     0.8834    0.9952    0.9360  13390249
         Other     0.0611    0.1587    0.0883    160639

      accuracy                         0.8664  16137096
     macro avg     0.8197    0.3835    0.4142  16137096
  weighted avg     0.8932    0.8664    0.8258  16137096

Resource Usage: {'Training Time (s)': 107.71467423439026, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 97.70577651515151}
