# Imports and benchmark functions

In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve, StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier

import time
import psutil
import threading
from memory_profiler import memory_usage

In [3]:
def apply_rf(X_train, y_train, best_params=None, random_state=42, n_jobs=-1, cv=5): 
    measurement_rf = {}
        
    # Default to empty dictionary if best_params is not provided
    best_params = best_params or {}

    rf_model = RandomForestClassifier(**best_params, random_state=random_state, n_jobs=n_jobs, verbose=1)
    
    # Function to monitor CPU usage during training
    cpu_usage = []
    stop_flag = threading.Event()

    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))

    # Function to train the model
    def train_model():
        rf_model.fit(X_train, y_train)

    try:
        # Start CPU monitoring in a separate thread
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()

        # Measure memory usage and training time
        start_time = time.time()
        train_model()
        training_time = time.time() - start_time

        # Stop CPU monitoring
        stop_flag.set()
        cpu_thread.join()

        # Add measurements
        measurement_rf['Training Time (s)'] = training_time
        measurement_rf['Peak CPU Usage (%)'] = max(cpu_usage)
        measurement_rf['Average CPU Usage (%)'] = sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0

        # Perform cross-validation
        cv_scores_rf = cross_val_score(rf_model, X_train, y_train, cv=cv, n_jobs=n_jobs)

        return cv_scores_rf, measurement_rf, rf_model

    except Exception as e:
        import traceback
        print("⛔ Full error traceback:")
        traceback.print_exc()  # Print detailed error traceback
        print(f"Error during Random Forest training: {e}")
        return None, None, None

In [4]:
def eval_dataset_w_RF(X_train, X_test, y_train, y_test):
    params_rf = {'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 8}

    # Fitting the model
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, best_params=params_rf)

    # Making predictions
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluating the model performance on the cross validation set vs accuracy on the test set
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    print(f'Cross validation average score: {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')

    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f'Accuracy on the test set: {accuracy_rf:.4f}')
    
    # Checking computational cost
    print("Resource measurements:", measurement_rf)
    print(classification_report(y_test, y_pred_rf))

In [5]:
def eval_dataset_w_KNN(X_train, X_test, y_train, y_test):
    params_knn = {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'kd_tree', 'leaf_size': 100, 'p': 1}
    
    # Fitting the model
    cv_scores_knn, measurement_knn, knn_model = apply_knn(X_train, y_train, best_params=params_knn)
    
    # Making predictions
    y_pred_knn = knn_model.predict(X_test)
    
    # Evaluating the model performance on the cross validation set vs accuracy on the test set
    cv_scores_mean_knn = np.mean(cv_scores_knn)
    print(f'Cross validation average score: {cv_scores_mean_knn:.4f} +/- standard deviation: {np.std(cv_scores_knn):.4f}')
    
    accuracy_knn = accuracy_score(y_test, y_pred_knn)
    print(f'Accuracy on the test set: {accuracy_knn:.4f}')
    
    # Checking computational cost
    print("Resource measurements:", measurement_knn)
    print(classification_report(y_test, y_pred_knn))

In [6]:
def apply_knn(X_train, y_train, best_params=None, random_state=42, n_jobs=-1, cv=5):
    measurement_knn = {}
    best_params = best_params or {}
    
    knn_model = KNeighborsClassifier(**best_params, n_jobs=n_jobs)
    
    cpu_usage = []
    stop_flag = threading.Event()
    
    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))
    
    try:
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()
        
        # Track both training and CV time
        total_start_time = time.time()
        
        # Train the model
        knn_model.fit(X_train, y_train)
        
        # Perform cross-validation
        cv_scores_knn = cross_val_score(knn_model, X_train, y_train, cv=cv, n_jobs=n_jobs)
        
        total_time = time.time() - total_start_time
        
        # Stop CPU monitoring
        stop_flag.set()
        cpu_thread.join()
        
        measurement_knn['Total Time (s)'] = total_time
        measurement_knn['Peak CPU Usage (%)'] = max(cpu_usage)
        measurement_knn['Average CPU Usage (%)'] = sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0
        
        return cv_scores_knn, measurement_knn, knn_model
        
    except Exception as e:
        import traceback
        print("⛔ Full error traceback:")
        traceback.print_exc()
        return None, None, None

# Data balancing


In [7]:
# Reading data
df = pd.read_csv("..\..\data prep\cicids2018_prep\cicids2018_final.csv")

In [8]:
df['Attack Type'].unique()

array(['Normal Traffic', 'Bot', 'DoS', 'Brute Force', 'DDoS',
       'Infilteration'], dtype=object)

In [9]:
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler

In [10]:
# Preparing training and test splits
X = df.drop('Attack Type', axis=1)
y = df['Attack Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [11]:
# Initialize scaling algos
RS = RobustScaler()
X_train_RS_scaled = RS.fit_transform(X_train)
X_test_RS_scaled = RS.transform(X_test)

SS = StandardScaler()
X_train_SS_scaled = SS.fit_transform(X_train)
X_test_SS_scaled = SS.transform(X_test)

MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [12]:
print(df['Attack Type'].value_counts())

Attack Type
Normal Traffic    8634196
DDoS               775470
DoS                196299
Bot                143977
Infilteration      107531
Brute Force         94876
Name: count, dtype: int64


## Evals

In [12]:
eval_dataset_w_RF(X_train, X_test, y_train, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.8min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.9817 +/- standard deviation: 0.0011
Accuracy on the test set: 0.9816
Resource measurements: {'Training Time (s)': 415.8986060619354, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.0798055678303}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

           Bot       1.00      0.99      0.99     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.99      0.92      0.95    232641
           DoS       0.99      0.98      0.98     58890
 Infilteration       0.00      0.00      0.00     32259
Normal Traffic       0.98      1.00      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.83      0.81      0.82   2985705
  weighted avg       0.97      0.98      0.98   2985705



  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
eval_dataset_w_RF(X_train_RS_scaled, X_test_RS_scaled, y_train, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  6.2min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.6s finished


Cross validation average score: 0.9817 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9833
Resource measurements: {'Training Time (s)': 381.7312135696411, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.26440922190194}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

           Bot       1.00      0.99      0.99     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.98      0.96      0.97    232641
           DoS       0.99      0.98      0.99     58890
 Infilteration       0.00      0.00      0.00     32259
Normal Traffic       0.98      1.00      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.83      0.82      0.82   2985705
  weighted avg       0.97      0.98      0.98   2985705



  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
eval_dataset_w_RF(X_train_SS_scaled, X_test_SS_scaled, y_train, y_test)

KeyboardInterrupt: 

In [None]:
eval_dataset_w_RF(X_train_MMS_scaled, X_test_MMS_scaled, y_train, y_test)

In [None]:
eval_dataset_w_KNN(X_train, X_test, y_train, y_test)

In [None]:
eval_dataset_w_KNN(X_train_RS_scaled, X_test_RS_scaled, y_train, y_test)

In [None]:
eval_dataset_w_KNN(X_train_SS_scaled, X_test_SS_scaled, y_train, y_test)

In [None]:
eval_dataset_w_KNN(X_train_MMS_scaled, X_test_MMS_scaled, y_train, y_test)

## Under Sampling

In [14]:
from imblearn.under_sampling import RandomUnderSampler, NearMiss

In [13]:
# Initializing the undersampling for the clean df
X_train_resampled_rus, y_train_resampled_rus = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train, y_train)

# Initializing the undersampling for the scaled df
X_train_scaled_rus_RS, y_train_scaled_rus_RS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_RS_scaled, y_train)

X_train_scaled_rus_SS, y_train_scaled_rus_SS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_SS_scaled, y_train)

X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [14]:
# Initializing the undersampling for the clean df
X_train_resampled_NM, y_train_resampled_NM = NearMiss(sampling_strategy={'Normal Traffic': 1000000}, version=3).fit_resample(X_train, y_train)

# Initializing the undersampling for the scaled df
X_train_scaled_NM_RS, y_train_scaled_NM_RS = NearMiss(sampling_strategy={'Normal Traffic': 1000000}, version=3).fit_resample(X_train_RS_scaled, y_train)

X_train_scaled_NM_SS, y_train_scaled_NM_SS = NearMiss(sampling_strategy={'Normal Traffic': 1000000}, version=3).fit_resample(X_train_SS_scaled, y_train)

X_train_scaled_NM_MMS, y_train_scaled_NM_MMS = NearMiss(sampling_strategy={'Normal Traffic': 1000000}, version=3).fit_resample(X_train_MMS_scaled, y_train)




## Evals RF

In [15]:
eval_dataset_w_RF(X_train_resampled_rus, X_test, y_train_resampled_rus, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.5min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.8s finished


Cross validation average score: 0.9547 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9804
Resource measurements: {'Training Time (s)': 94.44755840301514, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.07591463414637}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.50      0.05      0.09     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.90      0.84      0.83   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [16]:
eval_dataset_w_RF(X_train_scaled_rus_RS, X_test_RS_scaled, y_train_scaled_rus_RS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.5min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.9547 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9804
Resource measurements: {'Training Time (s)': 91.2171380519867, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.30794392523362}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.50      0.05      0.09     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.90      0.84      0.83   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [17]:
eval_dataset_w_RF(X_train_scaled_rus_SS, X_test_SS_scaled, y_train_scaled_rus_SS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   13.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.1min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.9547 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9804
Resource measurements: {'Training Time (s)': 70.78761100769043, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.6647524752475}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.48      0.05      0.09     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.89      0.84      0.83   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [18]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   51.2s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.4s finished


Cross validation average score: 0.9544 +/- standard deviation: 0.0001
Accuracy on the test set: 0.9802
Resource measurements: {'Training Time (s)': 53.128422021865845, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.67556109725685}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.49      0.04      0.08     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.89      0.83      0.83   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [19]:
eval_dataset_w_RF(X_train_resampled_NM, X_test, y_train_resampled_NM, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   33.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.1s finished


Cross validation average score: 0.9954 +/- standard deviation: 0.0004
Accuracy on the test set: 0.1410
Resource measurements: {'Training Time (s)': 33.91410732269287, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.62546125461256}
                precision    recall  f1-score   support

           Bot       0.99      0.99      0.99     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.62      1.00      0.77    232641
           DoS       0.31      1.00      0.47     58890
 Infilteration       0.01      0.97      0.03     32259
Normal Traffic       1.00      0.01      0.02   2590259

      accuracy                           0.14   2985705
     macro avg       0.65      0.83      0.55   2985705
  weighted avg       0.94      0.14      0.11   2985705



In [20]:
eval_dataset_w_RF(X_train_scaled_NM_RS, X_test_RS_scaled, y_train_scaled_NM_RS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   33.3s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.1s finished


Cross validation average score: 0.9956 +/- standard deviation: 0.0004
Accuracy on the test set: 0.1411
Resource measurements: {'Training Time (s)': 34.245667934417725, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.13248175182481}
                precision    recall  f1-score   support

           Bot       0.99      0.99      0.99     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.62      1.00      0.77    232641
           DoS       0.30      1.00      0.47     58890
 Infilteration       0.01      0.97      0.03     32259
Normal Traffic       1.00      0.01      0.02   2590259

      accuracy                           0.14   2985705
     macro avg       0.66      0.83      0.55   2985705
  weighted avg       0.94      0.14      0.11   2985705



In [21]:
eval_dataset_w_RF(X_train_scaled_NM_SS, X_test_SS_scaled, y_train_scaled_NM_SS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   25.9s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.1s finished


Cross validation average score: 0.9958 +/- standard deviation: 0.0002
Accuracy on the test set: 0.1341
Resource measurements: {'Training Time (s)': 26.811686038970947, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.9864864864865}
                precision    recall  f1-score   support

           Bot       0.99      0.99      0.99     43193
   Brute Force       0.99      0.99      0.99     28463
          DDoS       0.62      1.00      0.77    232641
           DoS       0.30      1.00      0.47     58890
 Infilteration       0.01      0.97      0.03     32259
Normal Traffic       0.99      0.00      0.00   2590259

      accuracy                           0.13   2985705
     macro avg       0.65      0.83      0.54   2985705
  weighted avg       0.94      0.13      0.10   2985705



In [22]:
eval_dataset_w_RF(X_train_scaled_NM_MMS, X_test_MMS_scaled, y_train_scaled_NM_MMS, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   18.5s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.1s finished


Cross validation average score: 0.9960 +/- standard deviation: 0.0001
Accuracy on the test set: 0.1321
Resource measurements: {'Training Time (s)': 19.3809974193573, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.41867469879519}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       0.95      1.00      0.97     28463
          DDoS       0.62      1.00      0.77    232641
           DoS       0.30      1.00      0.47     58890
 Infilteration       0.01      0.97      0.03     32259
Normal Traffic       0.99      0.00      0.00   2590259

      accuracy                           0.13   2985705
     macro avg       0.65      0.83      0.54   2985705
  weighted avg       0.94      0.13      0.09   2985705



## Evals KNN

In [23]:
eval_dataset_w_KNN(X_train_resampled_rus, X_test, y_train_resampled_rus, y_test)

Cross validation average score: 0.9456 +/- standard deviation: 0.0005
Accuracy on the test set: 0.9605
Resource measurements: {'Training Time (s)': 15.822944641113281, 'Peak CPU Usage (%)': 14.0, 'Average CPU Usage (%)': 8.185714285714285}
                precision    recall  f1-score   support

           Bot       0.98      1.00      0.99     43193
   Brute Force       0.97      0.99      0.98     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.91      0.96      0.93     58890
 Infilteration       0.11      0.25      0.15     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.96   2985705
     macro avg       0.81      0.86      0.83   2985705
  weighted avg       0.97      0.96      0.97   2985705



In [24]:
eval_dataset_w_KNN(X_train_scaled_rus_RS, X_test_RS_scaled, y_train_scaled_rus_RS, y_test)

Cross validation average score: 0.9379 +/- standard deviation: 0.0004
Accuracy on the test set: 0.9500
Resource measurements: {'Training Time (s)': 14.49321985244751, 'Peak CPU Usage (%)': 9.4, 'Average CPU Usage (%)': 7.339999999999999}
                precision    recall  f1-score   support

           Bot       0.98      1.00      0.99     43193
   Brute Force       0.98      1.00      0.99     28463
          DDoS       0.84      0.97      0.90    232641
           DoS       0.88      0.97      0.92     58890
 Infilteration       0.12      0.30      0.18     32259
Normal Traffic       0.99      0.95      0.97   2590259

      accuracy                           0.95   2985705
     macro avg       0.80      0.86      0.82   2985705
  weighted avg       0.97      0.95      0.96   2985705



In [25]:
eval_dataset_w_KNN(X_train_scaled_rus_SS, X_test_SS_scaled, y_train_scaled_rus_SS, y_test)

Cross validation average score: 0.9544 +/- standard deviation: 0.0004
Accuracy on the test set: 0.9645
Resource measurements: {'Training Time (s)': 14.538728952407837, 'Peak CPU Usage (%)': 8.9, 'Average CPU Usage (%)': 6.860000000000001}
                precision    recall  f1-score   support

           Bot       0.99      1.00      0.99     43193
   Brute Force       0.98      1.00      0.99     28463
          DDoS       0.94      0.99      0.97    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.13      0.31      0.18     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.96   2985705
     macro avg       0.84      0.88      0.85   2985705
  weighted avg       0.98      0.96      0.97   2985705



In [26]:
eval_dataset_w_KNN(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test)

Cross validation average score: 0.9544 +/- standard deviation: 0.0003
Accuracy on the test set: 0.9644
Resource measurements: {'Training Time (s)': 14.576143503189087, 'Peak CPU Usage (%)': 10.3, 'Average CPU Usage (%)': 7.340000000000001}
                precision    recall  f1-score   support

           Bot       0.99      1.00      0.99     43193
   Brute Force       0.98      1.00      0.99     28463
          DDoS       0.94      0.99      0.97    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.13      0.31      0.18     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.96   2985705
     macro avg       0.84      0.88      0.85   2985705
  weighted avg       0.98      0.96      0.97   2985705



In [27]:
eval_dataset_w_KNN(X_train_resampled_NM, X_test, y_train_resampled_NM, y_test)

Cross validation average score: 0.9849 +/- standard deviation: 0.0003
Accuracy on the test set: 0.1419
Resource measurements: {'Training Time (s)': 6.213958263397217, 'Peak CPU Usage (%)': 10.0, 'Average CPU Usage (%)': 7.219999999999999}
                precision    recall  f1-score   support

           Bot       0.86      1.00      0.92     43193
   Brute Force       0.90      0.99      0.95     28463
          DDoS       0.50      0.99      0.66    232641
           DoS       0.27      0.97      0.43     58890
 Infilteration       0.01      0.97      0.03     32259
Normal Traffic       1.00      0.01      0.03   2590259

      accuracy                           0.14   2985705
     macro avg       0.59      0.82      0.50   2985705
  weighted avg       0.93      0.14      0.10   2985705



In [28]:
eval_dataset_w_KNN(X_train_scaled_NM_RS, X_test_RS_scaled, y_train_scaled_NM_RS, y_test)

Cross validation average score: 0.9832 +/- standard deviation: 0.0001
Accuracy on the test set: 0.1406
Resource measurements: {'Training Time (s)': 5.580179691314697, 'Peak CPU Usage (%)': 10.5, 'Average CPU Usage (%)': 7.175}
                precision    recall  f1-score   support

           Bot       0.90      1.00      0.95     43193
   Brute Force       0.97      0.99      0.98     28463
          DDoS       0.43      0.99      0.60    232641
           DoS       0.27      0.98      0.43     58890
 Infilteration       0.01      0.93      0.03     32259
Normal Traffic       1.00      0.01      0.02   2590259

      accuracy                           0.14   2985705
     macro avg       0.60      0.82      0.50   2985705
  weighted avg       0.93      0.14      0.10   2985705



In [29]:
eval_dataset_w_KNN(X_train_scaled_NM_SS, X_test_SS_scaled, y_train_scaled_NM_SS, y_test)

Cross validation average score: 0.9955 +/- standard deviation: 0.0001
Accuracy on the test set: 0.1419
Resource measurements: {'Training Time (s)': 7.933487415313721, 'Peak CPU Usage (%)': 24.2, 'Average CPU Usage (%)': 17.8}
                precision    recall  f1-score   support

           Bot       0.94      1.00      0.97     43193
   Brute Force       0.98      0.99      0.98     28463
          DDoS       0.66      1.00      0.79    232641
           DoS       0.31      1.00      0.47     58890
 Infilteration       0.01      0.98      0.03     32259
Normal Traffic       1.00      0.01      0.02   2590259

      accuracy                           0.14   2985705
     macro avg       0.65      0.83      0.54   2985705
  weighted avg       0.94      0.14      0.11   2985705



In [30]:
eval_dataset_w_KNN(X_train_scaled_NM_MMS, X_test_MMS_scaled, y_train_scaled_NM_MMS, y_test)

Cross validation average score: 0.9959 +/- standard deviation: 0.0001
Accuracy on the test set: 0.1413
Resource measurements: {'Training Time (s)': 5.639127254486084, 'Peak CPU Usage (%)': 9.8, 'Average CPU Usage (%)': 8.825}
                precision    recall  f1-score   support

           Bot       0.66      1.00      0.80     43193
   Brute Force       0.93      1.00      0.96     28463
          DDoS       0.66      1.00      0.79    232641
           DoS       0.30      1.00      0.46     58890
 Infilteration       0.01      0.98      0.03     32259
Normal Traffic       1.00      0.01      0.02   2590259

      accuracy                           0.14   2985705
     macro avg       0.59      0.83      0.51   2985705
  weighted avg       0.94      0.14      0.11   2985705



## Over Sampling

In [13]:
from imblearn.over_sampling import SMOTE, ADASYN, BorderlineSMOTE

In [33]:
X_train_resampled_ADASYN, y_train_resampled_ADASYN = ADASYN(sampling_strategy='auto', random_state=42, n_neighbors=5).fit_resample(X_train_resampled_rus, y_train_resampled_rus)

X_train_resampled_scaled_RS_ADASYN, y_train_resampled_scaled_RS_ADASYN = ADASYN(sampling_strategy='auto', random_state=42, n_neighbors=5).fit_resample(X_train_scaled_rus_RS, y_train_scaled_rus_RS)

X_train_resampled_scaled_MMS_ADASYN, y_train_resampled_scaled_MMS_ADASYN = ADASYN(sampling_strategy='auto', random_state=42, n_neighbors=5).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


X_train_resampled_SMOTE, y_train_resampled_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_resampled_rus, y_train_resampled_rus)

X_train_resampled_scaled_RS_SMOTE, y_train_resampled_scaled_RS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_RS, y_train_scaled_rus_RS)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


X_train_resampled_BSMOTE, y_train_resampled_BSMOTE = BorderlineSMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS': 780000, 'DoS': 200000}, random_state=42, k_neighbors=5, m_neighbors=10, kind='borderline-1').fit_resample(X_train_resampled_rus, y_train_resampled_rus)

X_train_resampled_scaled_RS_BSMOTE, y_train_resampled_scaled_RS_BSMOTE = BorderlineSMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS': 780000, 'DoS': 200000}, random_state=42, k_neighbors=5, m_neighbors=10, kind='borderline-1').fit_resample(X_train_scaled_rus_RS, y_train_scaled_rus_RS)

X_train_resampled_scaled_MMS_BSMOTE, y_train_resampled_scaled_MMS_BSMOTE = BorderlineSMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 110000, 'DDoS': 780000, 'DoS': 200000}, random_state=42, k_neighbors=5, m_neighbors=10, kind='borderline-1').fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [34]:
y_train_resampled_rus.value_counts()

Attack Type
Normal Traffic    1000000
DDoS               542829
DoS                137409
Bot                100784
Infilteration       75272
Brute Force         66413
Name: count, dtype: int64

In [35]:
y_train_resampled_ADASYN.value_counts()

Attack Type
Infilteration     1010750
DDoS              1005961
DoS               1002119
Bot               1000061
Normal Traffic    1000000
Brute Force        999943
Name: count, dtype: int64

In [36]:
y_train_resampled_SMOTE.value_counts()

Attack Type
Normal Traffic    1000000
DDoS               780000
DoS                200000
Bot                150000
Infilteration      110000
Brute Force        100000
Name: count, dtype: int64

## Eval RF

In [38]:
eval_dataset_w_RF(X_train_resampled_ADASYN, X_test, y_train_resampled_ADASYN, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  5.0min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.8661 +/- standard deviation: 0.0129
Accuracy on the test set: 0.6599
Resource measurements: {'Training Time (s)': 307.0943458080292, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.93222013170264}
                precision    recall  f1-score   support

           Bot       0.82      1.00      0.90     43193
   Brute Force       0.76      1.00      0.86     28463
          DDoS       0.73      0.99      0.84    232641
           DoS       0.94      1.00      0.97     58890
 Infilteration       0.03      0.78      0.05     32259
Normal Traffic       1.00      0.61      0.76   2590259

      accuracy                           0.66   2985705
     macro avg       0.71      0.90      0.73   2985705
  weighted avg       0.96      0.66      0.76   2985705



In [39]:
eval_dataset_w_RF(X_train_resampled_scaled_RS_ADASYN, X_test_RS_scaled, y_train_resampled_scaled_RS_ADASYN, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  4.6min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.6s finished


Cross validation average score: 0.8669 +/- standard deviation: 0.0240
Accuracy on the test set: 0.7108
Resource measurements: {'Training Time (s)': 286.75903844833374, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 92.19229357798174}
                precision    recall  f1-score   support

           Bot       0.86      1.00      0.92     43193
   Brute Force       0.84      1.00      0.91     28463
          DDoS       0.91      0.98      0.94    232641
           DoS       0.96      0.99      0.97     58890
 Infilteration       0.03      0.78      0.06     32259
Normal Traffic       1.00      0.67      0.80   2590259

      accuracy                           0.71   2985705
     macro avg       0.76      0.90      0.77   2985705
  weighted avg       0.97      0.71      0.81   2985705



In [40]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_ADASYN, X_test_MMS_scaled, y_train_resampled_scaled_MMS_ADASYN, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   28.2s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.4min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.0s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    7.6s finished


Cross validation average score: 0.7324 +/- standard deviation: 0.0586
Accuracy on the test set: 0.6245
Resource measurements: {'Training Time (s)': 152.39505314826965, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.67130620985013}
                precision    recall  f1-score   support

           Bot       0.40      1.00      0.58     43193
   Brute Force       0.48      1.00      0.65     28463
          DDoS       0.78      0.91      0.84    232641
           DoS       0.92      1.00      0.96     58890
 Infilteration       0.03      0.80      0.05     32259
Normal Traffic       1.00      0.58      0.73   2590259

      accuracy                           0.62   2985705
     macro avg       0.60      0.88      0.63   2985705
  weighted avg       0.95      0.62      0.73   2985705



In [41]:
eval_dataset_w_RF(X_train_resampled_SMOTE, X_test, y_train_resampled_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   26.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  2.1min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.4s finished


Cross validation average score: 0.9488 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9798
Resource measurements: {'Training Time (s)': 127.03153467178345, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.86833333333338}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.40      0.09      0.14     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.88      0.84      0.84   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [42]:
eval_dataset_w_RF(X_train_resampled_scaled_RS_SMOTE, X_test_RS_scaled, y_train_resampled_scaled_RS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   20.8s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.7min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.2s finished


Cross validation average score: 0.9490 +/- standard deviation: 0.0003
Accuracy on the test set: 0.9800
Resource measurements: {'Training Time (s)': 106.99099493026733, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 92.2897260273973}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.41      0.09      0.14     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.88      0.84      0.84   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [43]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   59.1s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.0s finished


Cross validation average score: 0.9486 +/- standard deviation: 0.0006
Accuracy on the test set: 0.9799
Resource measurements: {'Training Time (s)': 62.386807441711426, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 91.2165577342048}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.48      0.06      0.10     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.89      0.84      0.84   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [44]:
eval_dataset_w_RF(X_train_resampled_BSMOTE, X_test, y_train_resampled_BSMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   20.9s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.8min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.2s finished


Cross validation average score: 0.9343 +/- standard deviation: 0.0172
Accuracy on the test set: 0.9760
Resource measurements: {'Training Time (s)': 109.25994896888733, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 91.75775978407553}
                precision    recall  f1-score   support

           Bot       0.97      1.00      0.99     43193
   Brute Force       0.92      1.00      0.96     28463
          DDoS       0.87      1.00      0.93    232641
           DoS       0.98      1.00      0.99     58890
 Infilteration       0.43      0.08      0.13     32259
Normal Traffic       0.99      0.98      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.86      0.84      0.83   2985705
  weighted avg       0.97      0.98      0.97   2985705



In [45]:
eval_dataset_w_RF(X_train_resampled_scaled_RS_BSMOTE, X_test_RS_scaled, y_train_resampled_scaled_RS_BSMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.7min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.3s finished


Cross validation average score: 0.9414 +/- standard deviation: 0.0083
Accuracy on the test set: 0.9789
Resource measurements: {'Training Time (s)': 106.98057055473328, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 92.47864077669898}
                precision    recall  f1-score   support

           Bot       0.98      1.00      0.99     43193
   Brute Force       0.91      1.00      0.95     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.43      0.08      0.13     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.87      0.84      0.83   2985705
  weighted avg       0.97      0.98      0.98   2985705



In [46]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_BSMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_BSMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   12.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   58.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.2s finished


Cross validation average score: 0.9020 +/- standard deviation: 0.0488
Accuracy on the test set: 0.9586
Resource measurements: {'Training Time (s)': 60.74031662940979, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.24832962138082}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.72      1.00      0.84    232641
           DoS       0.96      1.00      0.98     58890
 Infilteration       0.48      0.04      0.08     32259
Normal Traffic       0.99      0.96      0.98   2590259

      accuracy                           0.96   2985705
     macro avg       0.86      0.83      0.81   2985705
  weighted avg       0.96      0.96      0.96   2985705



## Eval KNN

In [47]:
eval_dataset_w_KNN(X_train_resampled_ADASYN, X_test, y_train_resampled_ADASYN, y_test)

Cross validation average score: 0.8732 +/- standard deviation: 0.0157
Accuracy on the test set: 0.8317
Resource measurements: {'Training Time (s)': 50.47208070755005, 'Peak CPU Usage (%)': 22.3, 'Average CPU Usage (%)': 9.303125}
                precision    recall  f1-score   support

           Bot       0.89      1.00      0.94     43193
   Brute Force       0.88      1.00      0.94     28463
          DDoS       0.86      0.97      0.91    232641
           DoS       0.80      0.98      0.88     58890
 Infilteration       0.04      0.54      0.07     32259
Normal Traffic       0.99      0.82      0.90   2590259

      accuracy                           0.83   2985705
     macro avg       0.75      0.88      0.77   2985705
  weighted avg       0.97      0.83      0.89   2985705



In [48]:
eval_dataset_w_KNN(X_train_resampled_scaled_RS_ADASYN, X_test_RS_scaled, y_train_resampled_scaled_RS_ADASYN, y_test)

Cross validation average score: 0.8553 +/- standard deviation: 0.0304
Accuracy on the test set: 0.8203
Resource measurements: {'Training Time (s)': 43.186851978302, 'Peak CPU Usage (%)': 12.1, 'Average CPU Usage (%)': 5.824137931034483}
                precision    recall  f1-score   support

           Bot       0.88      1.00      0.94     43193
   Brute Force       0.91      1.00      0.95     28463
          DDoS       0.75      0.98      0.85    232641
           DoS       0.57      0.99      0.73     58890
 Infilteration       0.04      0.57      0.08     32259
Normal Traffic       0.99      0.80      0.89   2590259

      accuracy                           0.82   2985705
     macro avg       0.69      0.89      0.74   2985705
  weighted avg       0.95      0.82      0.87   2985705



In [49]:
eval_dataset_w_KNN(X_train_resampled_scaled_MMS_ADASYN, X_test_MMS_scaled, y_train_resampled_scaled_MMS_ADASYN, y_test)

Cross validation average score: 0.8124 +/- standard deviation: 0.0508
Accuracy on the test set: 0.8573
Resource measurements: {'Training Time (s)': 41.4168586730957, 'Peak CPU Usage (%)': 11.9, 'Average CPU Usage (%)': 7.3500000000000005}
                precision    recall  f1-score   support

           Bot       0.93      1.00      0.96     43193
   Brute Force       0.91      1.00      0.95     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.98      0.99      0.99     58890
 Infilteration       0.05      0.60      0.09     32259
Normal Traffic       0.99      0.84      0.91   2590259

      accuracy                           0.86   2985705
     macro avg       0.80      0.90      0.81   2985705
  weighted avg       0.98      0.86      0.91   2985705



In [50]:
eval_dataset_w_KNN(X_train_resampled_SMOTE, X_test, y_train_resampled_SMOTE, y_test)

Cross validation average score: 0.9447 +/- standard deviation: 0.0045
Accuracy on the test set: 0.9454
Resource measurements: {'Training Time (s)': 18.83424711227417, 'Peak CPU Usage (%)': 13.2, 'Average CPU Usage (%)': 8.075000000000001}
                precision    recall  f1-score   support

           Bot       0.98      1.00      0.99     43193
   Brute Force       0.97      0.99      0.98     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.91      0.96      0.93     58890
 Infilteration       0.08      0.30      0.13     32259
Normal Traffic       0.99      0.95      0.97   2590259

      accuracy                           0.95   2985705
     macro avg       0.81      0.86      0.82   2985705
  weighted avg       0.97      0.95      0.96   2985705



In [51]:
eval_dataset_w_KNN(X_train_resampled_scaled_RS_SMOTE, X_test_RS_scaled, y_train_resampled_scaled_RS_SMOTE, y_test)

Cross validation average score: 0.9388 +/- standard deviation: 0.0043
Accuracy on the test set: 0.9364
Resource measurements: {'Training Time (s)': 16.681118726730347, 'Peak CPU Usage (%)': 8.1, 'Average CPU Usage (%)': 6.14}
                precision    recall  f1-score   support

           Bot       0.98      1.00      0.99     43193
   Brute Force       0.98      0.99      0.99     28463
          DDoS       0.82      0.98      0.89    232641
           DoS       0.86      0.97      0.91     58890
 Infilteration       0.10      0.34      0.15     32259
Normal Traffic       0.99      0.94      0.96   2590259

      accuracy                           0.94   2985705
     macro avg       0.79      0.87      0.82   2985705
  weighted avg       0.96      0.94      0.95   2985705



In [52]:
eval_dataset_w_KNN(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

Cross validation average score: 0.9535 +/- standard deviation: 0.0030
Accuracy on the test set: 0.9540
Resource measurements: {'Training Time (s)': 16.674004316329956, 'Peak CPU Usage (%)': 7.6, 'Average CPU Usage (%)': 5.5600000000000005}
                precision    recall  f1-score   support

           Bot       0.99      1.00      0.99     43193
   Brute Force       0.98      1.00      0.99     28463
          DDoS       0.94      0.99      0.97    232641
           DoS       0.99      1.00      0.99     58890
 Infilteration       0.10      0.35      0.16     32259
Normal Traffic       0.99      0.96      0.97   2590259

      accuracy                           0.95   2985705
     macro avg       0.83      0.88      0.85   2985705
  weighted avg       0.98      0.95      0.96   2985705



In [53]:
eval_dataset_w_KNN(X_train_resampled_BSMOTE, X_test, y_train_resampled_BSMOTE, y_test)

Cross validation average score: 0.9281 +/- standard deviation: 0.0157
Accuracy on the test set: 0.9404
Resource measurements: {'Training Time (s)': 19.088042974472046, 'Peak CPU Usage (%)': 12.7, 'Average CPU Usage (%)': 8.612499999999999}
                precision    recall  f1-score   support

           Bot       0.96      1.00      0.98     43193
   Brute Force       0.93      1.00      0.96     28463
          DDoS       0.87      0.99      0.92    232641
           DoS       0.89      0.95      0.92     58890
 Infilteration       0.08      0.31      0.13     32259
Normal Traffic       0.99      0.94      0.97   2590259

      accuracy                           0.94   2985705
     macro avg       0.79      0.86      0.81   2985705
  weighted avg       0.97      0.94      0.95   2985705



In [54]:
eval_dataset_w_KNN(X_train_resampled_scaled_RS_BSMOTE, X_test_RS_scaled, y_train_resampled_scaled_RS_BSMOTE, y_test)

Cross validation average score: 0.9233 +/- standard deviation: 0.0146
Accuracy on the test set: 0.9231
Resource measurements: {'Training Time (s)': 17.716742038726807, 'Peak CPU Usage (%)': 13.6, 'Average CPU Usage (%)': 9.866666666666665}
                precision    recall  f1-score   support

           Bot       0.95      1.00      0.97     43193
   Brute Force       0.94      1.00      0.97     28463
          DDoS       0.73      0.99      0.84    232641
           DoS       0.82      0.98      0.89     58890
 Infilteration       0.10      0.35      0.15     32259
Normal Traffic       0.99      0.92      0.95   2590259

      accuracy                           0.92   2985705
     macro avg       0.75      0.87      0.80   2985705
  weighted avg       0.96      0.92      0.94   2985705



In [55]:
eval_dataset_w_KNN(X_train_resampled_scaled_MMS_BSMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_BSMOTE, y_test)

Cross validation average score: 0.9264 +/- standard deviation: 0.0290
Accuracy on the test set: 0.9484
Resource measurements: {'Training Time (s)': 19.293482303619385, 'Peak CPU Usage (%)': 17.7, 'Average CPU Usage (%)': 12.25}
                precision    recall  f1-score   support

           Bot       0.97      1.00      0.98     43193
   Brute Force       0.95      1.00      0.97     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.98      1.00      0.99     58890
 Infilteration       0.10      0.36      0.16     32259
Normal Traffic       0.99      0.95      0.97   2590259

      accuracy                           0.95   2985705
     macro avg       0.82      0.88      0.84   2985705
  weighted avg       0.97      0.95      0.96   2985705



## Tweaking OverSampling

In [58]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 150000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [59]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   13.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.1min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.9353 +/- standard deviation: 0.0005
Accuracy on the test set: 0.9792
Resource measurements: {'Training Time (s)': 67.4256956577301, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.48983739837395}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.37      0.10      0.15     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.87      0.84      0.84   2985705
  weighted avg       0.97      0.98      0.98   2985705



In [60]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


In [61]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   13.7s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.1min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.6s finished


Cross validation average score: 0.9193 +/- standard deviation: 0.0003
Accuracy on the test set: 0.9763
Resource measurements: {'Training Time (s)': 69.95448899269104, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.39117647058825}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.22      0.14      0.17     32259
Normal Traffic       0.99      0.98      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.85      0.85      0.85   2985705
  weighted avg       0.97      0.98      0.97   2985705



In [62]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 250000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [63]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   13.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.2min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.0s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    7.2s finished


Cross validation average score: 0.9080 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9655
Resource measurements: {'Training Time (s)': 73.9387412071228, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.15779092702168}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.13      0.22      0.16     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.97   2985705
     macro avg       0.83      0.86      0.84   2985705
  weighted avg       0.97      0.97      0.97   2985705



In [64]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 300000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [65]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.8s finished


Cross validation average score: 0.8958 +/- standard deviation: 0.0006
Accuracy on the test set: 0.9615
Resource measurements: {'Training Time (s)': 78.69945096969604, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.82504604051569}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.94    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.12      0.25      0.16     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.96   2985705
     macro avg       0.83      0.87      0.84   2985705
  weighted avg       0.97      0.96      0.97   2985705



In [66]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 400000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [67]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.6s finished


Cross validation average score: 0.8758 +/- standard deviation: 0.0006
Accuracy on the test set: 0.9311
Resource measurements: {'Training Time (s)': 78.87329125404358, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.45982142857143}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.94    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.07      0.37      0.12     32259
Normal Traffic       0.99      0.93      0.96   2590259

      accuracy                           0.93   2985705
     macro avg       0.82      0.88      0.83   2985705
  weighted avg       0.97      0.93      0.95   2985705



In [68]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 500000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [69]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.5s finished


Cross validation average score: 0.8586 +/- standard deviation: 0.0008
Accuracy on the test set: 0.9152
Resource measurements: {'Training Time (s)': 78.65905475616455, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.65591397849457}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.06      0.41      0.11     32259
Normal Traffic       0.99      0.91      0.95   2590259

      accuracy                           0.92   2985705
     macro avg       0.82      0.88      0.83   2985705
  weighted avg       0.97      0.92      0.94   2985705



## Tweaking Under Sampling

In [15]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 800000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


In [16]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   12.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.0min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.0s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    7.0s finished


Cross validation average score: 0.9172 +/- standard deviation: 0.0003
Accuracy on the test set: 0.9655
Resource measurements: {'Training Time (s)': 64.43061780929565, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.85849056603774}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.13      0.22      0.16     32259
Normal Traffic       0.99      0.97      0.98   2590259

      accuracy                           0.97   2985705
     macro avg       0.83      0.86      0.84   2985705
  weighted avg       0.97      0.97      0.97   2985705



In [25]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


In [26]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.2min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.7s finished


Cross validation average score: 0.9193 +/- standard deviation: 0.0003
Accuracy on the test set: 0.9763
Resource measurements: {'Training Time (s)': 74.87904787063599, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.95523255813953}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.22      0.14      0.17     32259
Normal Traffic       0.99      0.98      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.85      0.85      0.85   2985705
  weighted avg       0.97      0.98      0.97   2985705



In [17]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1250000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


In [18]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   16.0s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.6s finished


Cross validation average score: 0.9243 +/- standard deviation: 0.0004
Accuracy on the test set: 0.9789
Resource measurements: {'Training Time (s)': 79.71161079406738, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.67264492753623}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.34      0.10      0.16     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.87      0.84      0.84   2985705
  weighted avg       0.97      0.98      0.98   2985705



In [19]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1500000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)


In [20]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   16.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.3min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.4s finished


Cross validation average score: 0.9292 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9794
Resource measurements: {'Training Time (s)': 83.83788704872131, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.29611486486485}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.90      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.40      0.09      0.15     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.88      0.84      0.84   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [21]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1750000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [22]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   17.9s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.5min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    6.4s finished


Cross validation average score: 0.9330 +/- standard deviation: 0.0002
Accuracy on the test set: 0.9800
Resource measurements: {'Training Time (s)': 92.54423308372498, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 93.46625954198475}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.43      0.07      0.12     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.88      0.84      0.84   2985705
  weighted avg       0.98      0.98      0.98   2985705



In [23]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 2000000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 150000, 'Brute Force': 100000, 'Infilteration': 200000, 'DDoS':780000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [24]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:  1.9min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.0s
[Parallel(n_jobs=16)]: Done 150 out of 150 | elapsed:    7.4s finished


Cross validation average score: 0.9367 +/- standard deviation: 0.0001
Accuracy on the test set: 0.9802
Resource measurements: {'Training Time (s)': 115.90941166877747, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.70013908205841}
                precision    recall  f1-score   support

           Bot       1.00      0.99      1.00     43193
   Brute Force       1.00      0.99      1.00     28463
          DDoS       0.91      0.99      0.95    232641
           DoS       0.97      1.00      0.98     58890
 Infilteration       0.46      0.05      0.09     32259
Normal Traffic       0.99      0.99      0.99   2590259

      accuracy                           0.98   2985705
     macro avg       0.89      0.84      0.83   2985705
  weighted avg       0.98      0.98      0.98   2985705

