# Imports and functions

In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, make_scorer

from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

import time
import psutil
import threading
from memory_profiler import memory_usage

In [None]:
def apply_lgbm(X_train, y_train, best_params=None, n_jobs=-1, cv=5):
    """Core LightGBM training with resource monitoring"""
    measurement = {}
    best_params = best_params or {}
    
    lgbm_model = LGBMClassifier(**best_params, verbose=-1, n_jobs=n_jobs)
    cpu_usage = []
    stop_flag = threading.Event()

    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))

    try:
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()
        start_time = time.time()
        
        lgbm_model.fit(X_train, y_train)
        
        training_time = time.time() - start_time
        stop_flag.set()
        cpu_thread.join()

        # Record metrics
        measurement['Training Time (s)'] = training_time
        measurement['Peak CPU (%)'] = max(cpu_usage) if cpu_usage else 0
        measurement['Avg CPU (%)'] = np.mean(cpu_usage) if cpu_usage else 0

        # Cross-validation
        f1_scorer = make_scorer(f1_score, average='weighted')
        cv_scores = cross_val_score(
            lgbm_model, X_train, y_train, cv=cv, scoring=f1_scorer, n_jobs=n_jobs
        )
        
        return cv_scores, measurement, lgbm_model

    except Exception as e:
        print(f"LightGBM training failed: {str(e)}")
        return None, None, None

def eval_dataset_w_LGBM(X_train, X_test, y_train, y_test,
                       params_lgbm={'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': -1}):
    """Evaluation wrapper for LightGBM"""
    cv_scores_lgbm, measurement_lgbm, lgbm_model = apply_lgbm(X_train, y_train, best_params=params_lgbm)
    
    if lgbm_model is None:
        print("⛔ Failed to train LightGBM model")
        return
        
    start_time = time.time()
    y_pred_lgbm = lgbm_model.predict(X_test)
    training_time = time.time() - start_time
    print("Predict Time (s) - ", training_time)

    print(f'CV F1: {np.mean(cv_scores_lgbm):.4f} ± {np.std(cv_scores_lgbm):.4f}')
    print(f'Test Accuracy: {accuracy_score(y_test, y_pred_lgbm):.4f}')
    print(classification_report(y_test, y_pred_lgbm, digits=4))
    print("Resource Usage:", measurement_lgbm)

In [17]:
def apply_rf(X_train, y_train, best_params=None, random_state=42, n_jobs=-1, cv=5): 
    measurement_rf = {}
    best_params = best_params or {}

    rf_model = RandomForestClassifier(**best_params, random_state=random_state, n_jobs=n_jobs, verbose=1)
    
    cpu_usage = []
    stop_flag = threading.Event()

    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))

    def train_model():
        rf_model.fit(X_train, y_train)

    try:
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()

        start_time = time.time()
        train_model()
        training_time = time.time() - start_time

        stop_flag.set()
        cpu_thread.join()

        measurement_rf['Training Time (s)'] = training_time
        measurement_rf['Peak CPU Usage (%)'] = max(cpu_usage)
        measurement_rf['Average CPU Usage (%)'] = sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0

        # Modified to use F1 score
        f1_scorer = make_scorer(f1_score, average='weighted')
        cv_scores_rf = cross_val_score(rf_model, X_train, y_train, cv=cv, n_jobs=n_jobs, scoring=f1_scorer)

        return cv_scores_rf, measurement_rf, rf_model

    except Exception as e:
        import traceback
        print("⛔ Full error traceback:")
        traceback.print_exc()
        print(f"Error during Random Forest training: {e}")
        return None, None, None

def eval_dataset_w_RF(X_train, X_test, y_train, y_test, params_rf={'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}):
    # Fitting the model
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, best_params=params_rf)
    
    start_time = time.time()

    # Making predictions
    y_pred_rf = rf_model.predict(X_test)
    
    training_time = time.time() - start_time
    print("Predict Time (s) - ", training_time)
    
    # Evaluating the model performance
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    print(f'Cross validation average score: {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')

    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f'Accuracy on the test set: {accuracy_rf:.4f}')
    
    # Checking computational cost
    print("Resource measurements:", measurement_rf)
    print(classification_report(y_test, y_pred_rf, digits=4))

# Import Data and sync features

## Check columns

In [5]:
# Reading data
df17 = pd.read_csv("..\data prep\cicids2017_prep\cicids2017_42feat_97percent.csv")
df18 = pd.read_csv("..\data prep\cicids2018_prep\cicids2018_21feat_84per.csv")

In [6]:
df17.columns

Index(['Destination Port', 'Flow Duration', 'Total Fwd Packets',
       'Total Length of Fwd Packets', 'Fwd Packet Length Max',
       'Fwd Packet Length Min', 'Fwd Packet Length Mean',
       'Fwd Packet Length Std', 'Bwd Packet Length Max',
       'Bwd Packet Length Min', 'Bwd Packet Length Mean', 'Flow Bytes/s',
       'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',
       'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std', 'Fwd IAT Min',
       'Bwd IAT Max', 'Bwd IAT Min', 'Fwd Header Length', 'Bwd Header Length',
       'Fwd Packets/s', 'Bwd Packets/s', 'Min Packet Length',
       'Max Packet Length', 'Packet Length Mean', 'Packet Length Variance',
       'PSH Flag Count', 'ACK Flag Count', 'Avg Fwd Segment Size',
       'Avg Bwd Segment Size', 'Subflow Bwd Bytes', 'Init_Win_bytes_forward',
       'Init_Win_bytes_backward', 'act_data_pkt_fwd', 'min_seg_size_forward',
       'Active Max', 'Idle Mean', 'Attack Type'],
      dtype='object')

In [7]:
df18.columns

Index(['Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts', 'TotLen Fwd Pkts',
       'Fwd Pkt Len Max', 'Fwd Pkt Len Mean', 'Bwd Pkt Len Max',
       'Bwd Pkt Len Mean', 'Flow Byts/s', 'Flow Pkts/s', 'Flow IAT Mean',
       'Flow IAT Std', 'Flow IAT Max', 'Bwd IAT Min', 'Bwd Pkts/s',
       'Pkt Len Var', 'RST Flag Cnt', 'ACK Flag Cnt', 'Init Bwd Win Byts',
       'Fwd Seg Size Min', 'Attack Type'],
      dtype='object')

In [8]:
del df17
del df18

## Sync CIC IDS 2018 to 2017

In [9]:
df18_full = pd.read_csv("..\data prep\cicids2018_prep\combined_data_fixed_f32.csv")

ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
df18_full.columns

In [None]:
# Sync columns to 2017 dataset
needed_columns_2018 = ["Dst Port", "Flow Duration", "Tot Fwd Pkts", "TotLen Fwd Pkts", "Fwd Pkt Len Max", "Fwd Pkt Len Min", "Fwd Pkt Len Mean", "Fwd Pkt Len Std", "Bwd Pkt Len Max", "Bwd Pkt Len Min", "Bwd Pkt Len Mean", "Flow Byts/s", "Flow Pkts/s", "Flow IAT Mean", "Flow IAT Std", "Flow IAT Max", "Fwd IAT Tot", "Fwd IAT Mean", "Fwd IAT Std", "Fwd IAT Min", "Bwd IAT Max", "Bwd IAT Min", "Fwd Header Len", "Bwd Header Len", "Fwd Pkts/s", "Bwd Pkts/s", "Pkt Len Min", "Pkt Len Max", "Pkt Len Mean", "Pkt Len Var", "PSH Flag Cnt", "ACK Flag Cnt", "Fwd Seg Size Avg", "Bwd Seg Size Avg", "Subflow Bwd Byts", "Init Fwd Win Byts", "Init Bwd Win Byts", "Fwd Act Data Pkts", "Fwd Seg Size Min", "Active Max", "Idle Mean", "Label"]
for column in df18_full.columns:
    if column not in needed_columns_2018:
        df18_full.drop(column, axis=1, inplace=True)

In [None]:
# Rename columns to the names of 2017 dataset
group_mapping = {"Dst Port": "Destination Port","Flow Duration": "Flow Duration","Tot Fwd Pkts": "Total Fwd Packets","TotLen Fwd Pkts": "Total Length of Fwd Packets","Fwd Pkt Len Max": "Fwd Packet Length Max","Fwd Pkt Len Min": "Fwd Packet Length Min","Fwd Pkt Len Mean": "Fwd Packet Length Mean","Fwd Pkt Len Std": "Fwd Packet Length Std","Bwd Pkt Len Max": "Bwd Packet Length Max","Bwd Pkt Len Min": "Bwd Packet Length Min","Bwd Pkt Len Mean": "Bwd Packet Length Mean","Flow Byts/s": "Flow Bytes/s","Flow Pkts/s": "Flow Packets/s","Flow IAT Mean": "Flow IAT Mean","Flow IAT Std": "Flow IAT Std","Flow IAT Max": "Flow IAT Max","Fwd IAT Tot": "Fwd IAT Total","Fwd IAT Mean": "Fwd IAT Mean","Fwd IAT Std": "Fwd IAT Std","Fwd IAT Min": "Fwd IAT Min","Bwd IAT Max": "Bwd IAT Max","Bwd IAT Min": "Bwd IAT Min","Fwd Header Len": "Fwd Header Length","Bwd Header Len": "Bwd Header Length","Fwd Pkts/s": "Fwd Packets/s","Bwd Pkts/s": "Bwd Packets/s","Pkt Len Min": "Min Packet Length","Pkt Len Max": "Max Packet Length","Pkt Len Mean": "Packet Length Mean","Pkt Len Var": "Packet Length Variance","PSH Flag Cnt": "PSH Flag Count","ACK Flag Cnt": "ACK Flag Count","Fwd Seg Size Avg": "Avg Fwd Segment Size","Bwd Seg Size Avg": "Avg Bwd Segment Size","Subflow Bwd Byts": "Subflow Bwd Bytes","Init Fwd Win Byts": "Init_Win_bytes_forward","Init Bwd Win Byts": "Init_Win_bytes_backward","Fwd Act Data Pkts": "act_data_pkt_fwd","Fwd Seg Size Min": "min_seg_size_forward","Active Max": "Active Max","Idle Mean": "Idle Mean","Label": "Attack Type"}

df18_full = df18_full.rename(columns=group_mapping)

In [None]:
# Mapping the attacks to the new group
group_mapping = {
    'Benign': 'Normal Traffic',
    'Brute Force -Web': 'Brute Force',
    'Brute Force -XSS': 'Brute Force',
    'FTP-BruteForce': 'Brute Force',
    'SSH-Bruteforce': 'Brute Force',
    'DDOS attack-HOIC': 'DDoS',
    'DDOS attack-LOIC-UDP': 'DDoS',
    'DDoS attacks-LOIC-HTTP': 'DDoS',
    'DoS attacks-GoldenEye': 'DoS',
    'DoS attacks-Hulk': 'DoS',
    'DoS attacks-SlowHTTPTest': 'DoS',
    'DoS attacks-Slowloris': 'DoS',
    'Bot': 'Bot',
    'SQL Injection': 'SQL Injection',
    'Infilteration': 'Infilteration',
}

# Map to new group column
df18_full['Attack Type'] = df18_full['Attack Type'].map(group_mapping)
df18_full.drop(df18_full[(df18_full['Attack Type'] == 'SQL Injection')].index, inplace=True)

In [None]:
df18_full.columns

In [None]:
df18_full['Attack Type'].unique()

In [None]:
df17 = pd.read_csv("..\data prep\cicids2017_prep\cicids2017_42feat_97percent.csv")


In [None]:
list(df18_full.columns) == list(df17.columns)

In [None]:
# save synced 2018 dataset
df18_full.to_csv("cicids2018_test_of_2017.csv", index=False)

In [None]:
del df18_full
del df17

## Sync CIC IDS 2017 to 2018

In [None]:
df17_full = pd.read_csv("..\data prep\cicids2017_prep\cicids2017_before_corr_analysis.csv")

In [None]:
df17_full.columns

In [None]:
needed_columns_2017 = ["Flow Duration", "Total Fwd Packets", "Total Backward Packets", "Total Length of Fwd Packets", "Fwd Packet Length Max", "Fwd Packet Length Mean", "Bwd Packet Length Max", "Bwd Packet Length Mean", "Flow Bytes/s", "Flow Packets/s", "Flow IAT Mean", "Flow IAT Std", "Flow IAT Max", "Bwd IAT Min", "Bwd Packets/s", "Packet Length Variance", "RST Flag Count", "ACK Flag Count", "Init_Win_bytes_backward", "min_seg_size_forward", "Attack Type"]
for column in df17_full.columns:
    if column not in needed_columns_2017:
        df17_full.drop(column, axis=1, inplace=True)

In [None]:
group_mapping = {"Flow Duration": "Flow Duration", "Total Fwd Packets": "Tot Fwd Pkts", "Total Backward Packets": "Tot Bwd Pkts","Total Length of Fwd Packets": "TotLen Fwd Pkts","Fwd Packet Length Max": "Fwd Pkt Len Max","Fwd Packet Length Mean": "Fwd Pkt Len Mean","Bwd Packet Length Max": "Bwd Pkt Len Max","Bwd Packet Length Mean": "Bwd Pkt Len Mean","Flow Bytes/s": "Flow Byts/s","Flow Packets/s": "Flow Pkts/s","Flow IAT Mean": "Flow IAT Mean","Flow IAT Std": "Flow IAT Std","Flow IAT Max": "Flow IAT Max","Bwd IAT Min": "Bwd IAT Min","Bwd Packets/s": "Bwd Pkts/s","Packet Length Variance": "Pkt Len Var","RST Flag Count": "RST Flag Cnt","ACK Flag Count": "ACK Flag Cnt","Init_Win_bytes_backward": "Init Bwd Win Byts","min_seg_size_forward": "Fwd Seg Size Min","Attack Type": "Attack Type"}

df17_full = df17_full.rename(columns=group_mapping)

In [None]:
df17_full["Attack Type"].unique()

In [None]:
df17_full.columns

In [None]:
df17_full['Attack Type'].unique()

In [None]:
df18 = pd.read_csv("..\data prep\cicids2018_prep\cicids2018_21feat_84per.csv")

In [None]:
list(df17_full.columns) == list(df18.columns)

In [None]:
df17_full.to_csv("cicids2017_test_of_2018.csv", index=False)

In [None]:
del df17_full
del df18

# MultiClass

## Training on 2017 and testing on 2018

### Preparing Sampling of training data

In [54]:
# Reading data
df = pd.read_csv("cicids2017_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [55]:
# Reading data
df = pd.read_csv("cicids2018_test_of_2017.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [None]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [57]:
y_train.value_counts()

Attack Type
Normal Traffic    2095057
DoS                193745
DDoS               128014
Port Scanning       90694
Brute Force          9150
Web Attacks          2143
Bots                 1948
Name: count, dtype: int64

In [None]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 650000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [None]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bots': 10000, 'Web Attacks': 10000, 'Brute Force': 10000, 'Port Scanning': 91000, 'DDoS': 130000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [60]:
y_train_resampled_scaled_MMS_SMOTE.value_counts()

Attack Type
Normal Traffic    650000
DoS               200000
DDoS              130000
Port Scanning      91000
Bots               10000
Brute Force        10000
Web Attacks        10000
Name: count, dtype: int64

In [None]:
group_mapping_2017 = {
    'Normal Traffic': 'Normal Traffic',
    'DoS': 'DoS',
    'DDoS': 'DDoS',
    'Brute Force': 'Brute Force',
    'Bots': 'Bots',
    'Web Attacks': 'Other',
    'Port Scanning': 'Other'}

In [None]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2017)

### Training and testing models

In [102]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  2.5min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   21.9s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:   32.4s finished


Predict Time (s) -  35.55641722679138
Cross validation average score: 0.9788 +/- standard deviation: 0.0395
Accuracy on the test set: 0.8552
Resource measurements: {'Training Time (s)': 151.48141598701477, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.83155893536114}
                precision    recall  f1-score   support

          Bots     0.1235    0.0001    0.0002    286191
   Brute Force     0.0033    0.0001    0.0001    381784
          DDoS     0.9994    0.2289    0.3725   1263933
           DoS     0.6114    0.7020    0.6536    654300
Normal Traffic     0.8681    0.9746    0.9183  13390249
         Other     0.0145    0.0050    0.0074    160639

      accuracy                         0.8552  16137096
     macro avg     0.4367    0.3184    0.3253  16137096
  weighted avg     0.8258    0.8552    0.8177  16137096



In [63]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   25.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:   23.9s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:   35.5s finished


Predict Time (s) -  37.764766693115234
Cross validation average score: 0.9801 +/- standard deviation: 0.0355
Accuracy on the test set: 0.8247
Resource measurements: {'Training Time (s)': 25.853893518447876, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 90.00552486187843}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

          Bots     0.0000    0.0000    0.0000    286191
   Brute Force     0.0026    0.0000    0.0000    381784
          DDoS     0.0000    0.0000    0.0000   1263933
           DoS     0.2396    0.2105    0.2241    654300
Normal Traffic     0.8464    0.9836    0.9098  13390249
         Other     0.1300    0.0002    0.0003    160639

      accuracy                         0.8247  16137096
     macro avg     0.2031    0.1990    0.1890  16137096
  weighted avg     0.7134    0.8247    0.7641  16137096



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
eval_dataset_w_LGBM(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  118.67052054405212
CV F1: 0.9859 ± 0.0263
Test Accuracy: 0.8602


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

          Bots     0.9997    0.0316    0.0613    286191
   Brute Force     0.9674    0.2447    0.3906    381784
          DDoS     0.0000    0.0000    0.0000   1263933
           DoS     0.9815    0.6031    0.7471    654300
Normal Traffic     0.8751    0.9994    0.9331  13390249
         Other     0.0019    0.0039    0.0025    160639

      accuracy                         0.8602  16137096
     macro avg     0.6376    0.3138    0.3558  16137096
  weighted avg     0.8065    0.8602    0.8149  16137096

Resource Usage: {'Training Time (s)': 43.07909059524536, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 98.74724220623503}


  _warn_prf(average, modifier, msg_start, len(result))


## Training on 2018 and testing on 2017

### Preparing Sampling of training data

In [64]:
# Reading data
df = pd.read_csv("cicids2018_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [65]:
# Reading data
df = pd.read_csv("cicids2017_test_of_2018.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [66]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [67]:
y_train.value_counts()

Attack Type
Normal Traffic    8634196
DDoS               775470
DoS                196299
Bot                143977
Infilteration      107531
Brute Force         94876
Name: count, dtype: int64

In [68]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1300000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [69]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 195000, 'Brute Force': 130000, 'Infilteration': 150000, 'DDoS':800000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [70]:
y_train_resampled_scaled_MMS_SMOTE.value_counts()

Attack Type
Normal Traffic    1300000
DDoS               800000
DoS                200000
Bot                195000
Infilteration      150000
Brute Force        130000
Name: count, dtype: int64

In [71]:
group_mapping_2018 = {
    'Normal Traffic': 'Normal Traffic',
    'DoS': 'DoS',
    'DDoS': 'DDoS',
    'Brute Force': 'Brute Force',
    'Bot': 'Bots',
    'Infilteration': 'Other'}

In [72]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2018)

### Training and testing models

In [134]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  2.8min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.9s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.6s finished


Predict Time (s) -  3.109252452850342
Cross validation average score: 0.9355 +/- standard deviation: 0.0056
Accuracy on the test set: 0.7261
Resource measurements: {'Training Time (s)': 167.48852038383484, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.55463535228677}
                precision    recall  f1-score   support

          Bots     0.0000    0.0000    0.0000      1948
   Brute Force     0.0313    0.2737    0.0562      9150
          DDoS     0.0000    0.0000    0.0000    128014
           DoS     0.1741    0.0890    0.1178    193745
Normal Traffic     0.8568    0.8447    0.8507   2095057
         Other     0.1543    0.4391    0.2283     92837

      accuracy                         0.7261   2520751
     macro avg     0.2028    0.2744    0.2089   2520751
  weighted avg     0.7313    0.7261    0.7247   2520751



In [73]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   18.4s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   38.7s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.4s finished


Predict Time (s) -  2.761488914489746
Cross validation average score: 0.9351 +/- standard deviation: 0.0024
Accuracy on the test set: 0.7729
Resource measurements: {'Training Time (s)': 40.63669466972351, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.61086142322097}
                precision    recall  f1-score   support

          Bots     0.0000    0.0000    0.0000      1948
   Brute Force     0.0001    0.0008    0.0002      9150
          DDoS     0.0000    0.0000    0.0000    128014
           DoS     0.2341    0.0908    0.1308    193745
Normal Traffic     0.8538    0.9015    0.8770   2095057
         Other     0.2638    0.4517    0.3331     92837

      accuracy                         0.7729   2520751
     macro avg     0.2253    0.2408    0.2235   2520751
  weighted avg     0.7373    0.7729    0.7512   2520751



In [135]:
eval_dataset_w_LGBM(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  23.320427417755127
CV F1: 0.9390 ± 0.0041
Test Accuracy: 0.7793
                precision    recall  f1-score   support

          Bots     0.1170    0.0611    0.0803      1948
   Brute Force     0.0000    0.0000    0.0000      9150
          DDoS     0.0000    0.0000    0.0000    128014
           DoS     0.1777    0.0967    0.1253    193745
Normal Traffic     0.8556    0.8940    0.8744   2095057
         Other     0.3412    0.7817    0.4751     92837

      accuracy                         0.7793   2520751
     macro avg     0.2486    0.3056    0.2592   2520751
  weighted avg     0.7374    0.7793    0.7539   2520751

Resource Usage: {'Training Time (s)': 56.15343260765076, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 99.11872659176029}


# Binary

## Training on 2017 and testing on 2018

### Preparing Sampling of training data

In [22]:
# Reading data
df = pd.read_csv("cicids2017_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [23]:
# Reading data
df = pd.read_csv("cicids2018_test_of_2017.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [24]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [25]:
y_train.value_counts()

Attack Type
Normal Traffic    2095057
DoS                193745
DDoS               128014
Port Scanning       90694
Brute Force          9150
Web Attacks          2143
Bots                 1948
Name: count, dtype: int64

In [26]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 650000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [27]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bots': 10000, 'Web Attacks': 10000, 'Brute Force': 10000, 'Port Scanning': 91000, 'DDoS': 130000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [28]:
y_train_resampled_scaled_MMS_SMOTE.value_counts()

Attack Type
Normal Traffic    650000
DoS               200000
DDoS              130000
Port Scanning      91000
Bots               10000
Brute Force        10000
Web Attacks        10000
Name: count, dtype: int64

In [29]:
group_mapping_2017 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Web Attacks': 'Attack',
    'Port Scanning': 'Attack'}

In [30]:
group_mapping_2018 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Other': 'Attack'}

In [31]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2017)
y_test = y_test.map(group_mapping_2018)

### Training and testing models

In [32]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  2.6min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    4.4s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:   10.5s finished


Predict Time (s) -  12.672688245773315
Cross validation average score: 0.9961 +/- standard deviation: 0.0035
Accuracy on the test set: 0.8738
Resource measurements: {'Training Time (s)': 159.56337594985962, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.73828892005608}
              precision    recall  f1-score   support

      Attack     0.7485    0.3891    0.5120   2746847
      BENIGN     0.8859    0.9732    0.9275  13390249

    accuracy                         0.8738  16137096
   macro avg     0.8172    0.6811    0.7198  16137096
weighted avg     0.8625    0.8738    0.8568  16137096



In [33]:
eval_dataset_w_LGBM(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  19.30156946182251
CV F1: 0.9942 ± 0.0092
Test Accuracy: 0.8811
              precision    recall  f1-score   support

      Attack     0.9889    0.3049    0.4661   2746847
      BENIGN     0.8751    0.9993    0.9331  13390249

    accuracy                         0.8811  16137096
   macro avg     0.9320    0.6521    0.6996  16137096
weighted avg     0.8945    0.8811    0.8536  16137096

Resource Usage: {'Training Time (s)': 9.292603731155396, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 96.56966292134832}


## Training on 2018 and testing on 2017

### Preparing Sampling of training data

In [35]:
# Reading data
df = pd.read_csv("cicids2018_training.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [36]:
# Reading data
df = pd.read_csv("cicids2017_test_of_2018.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [37]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [38]:
y_train.value_counts()

Attack Type
Normal Traffic    8634196
DDoS               775470
DoS                196299
Bot                143977
Infilteration      107531
Brute Force         94876
Name: count, dtype: int64

In [39]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 1300000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [45]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bot': 195000, 'Brute Force': 130000, 'Infilteration': 150000, 'DDoS':800000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

In [46]:
y_train_resampled_scaled_MMS_SMOTE.value_counts()

Attack Type
Normal Traffic    1300000
DDoS               800000
DoS                200000
Bot                195000
Infilteration      150000
Brute Force        130000
Name: count, dtype: int64

In [47]:
group_mapping_2017 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Other': 'Attack'}

In [48]:
group_mapping_2018 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Other': 'Attack'}

In [49]:
y_train_resampled_scaled_MMS_SMOTE = y_train_resampled_scaled_MMS_SMOTE.map(group_mapping_2018)
y_test = y_test.map(group_mapping_2017)

### Training and testing models

In [52]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  3.2min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.6s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    1.5s finished


Predict Time (s) -  1.7936029434204102
Cross validation average score: 0.9273 +/- standard deviation: 0.1111
Accuracy on the test set: 0.7727
Resource measurements: {'Training Time (s)': 195.88097214698792, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 92.4441860465116}
              precision    recall  f1-score   support

      Attack     0.2708    0.2043    0.2329    425694
      BENIGN     0.8460    0.8883    0.8666   2095057

    accuracy                         0.7727   2520751
   macro avg     0.5584    0.5463    0.5497   2520751
weighted avg     0.7489    0.7727    0.7596   2520751



In [None]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   19.9s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   41.3s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.6s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    1.7s finished


Predict Time (s) -  1.9202516078948975
Cross validation average score: 0.9296 +/- standard deviation: 0.1122
Accuracy on the test set: 0.7838
Resource measurements: {'Training Time (s)': 43.15839505195618, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 94.96810631229239}
              precision    recall  f1-score   support

      Attack     0.3233    0.2565    0.2861    425694
      BENIGN     0.8550    0.8909    0.8726   2095057

    accuracy                         0.7838   2520751
   macro avg     0.5892    0.5737    0.5793   2520751
weighted avg     0.7652    0.7838    0.7736   2520751



In [51]:
eval_dataset_w_LGBM(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  3.3581292629241943
CV F1: 0.9261 ± 0.1156
Test Accuracy: 0.7237
              precision    recall  f1-score   support

      Attack     0.2600    0.3447    0.2964    425694
      BENIGN     0.8574    0.8007    0.8281   2095057

    accuracy                         0.7237   2520751
   macro avg     0.5587    0.5727    0.5622   2520751
weighted avg     0.7565    0.7237    0.7383   2520751

Resource Usage: {'Training Time (s)': 14.846580266952515, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 97.37573529411763}


# Test with higher feature counts for training on 2018

## MultiClass

### Preparing Sampling of training data

In [116]:
# Reading data
df = pd.read_csv("cicids2018_test_of_2017.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [117]:
# Reading data
df = pd.read_csv("cicids2017_training.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [118]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [119]:
y_train.value_counts()

Attack Type
Normal Traffic    13390249
DDoS               1263933
DoS                 654300
Brute Force         381784
Bots                286191
Other               160639
Name: count, dtype: int64

In [120]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 650000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [121]:
y_train_scaled_rus_MMS.value_counts()

Attack Type
DDoS              1263933
DoS                654300
Normal Traffic     650000
Brute Force        381784
Bots               286191
Other              160639
Name: count, dtype: int64

In [122]:
group_mapping_2017 = {
    'Normal Traffic': 'Normal Traffic',
    'DoS': 'DoS',
    'DDoS': 'DDoS',
    'Brute Force': 'Brute Force',
    'Bots': 'Bots',
    'Web Attacks': 'Other',
    'Port Scanning': 'Other'}

In [123]:
y_test = y_test.map(group_mapping_2017)

### Training and testing models

In [84]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  6.0min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.0s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.9s finished


Predict Time (s) -  3.8243203163146973
Cross validation average score: 0.9127 +/- standard deviation: 0.0381
Accuracy on the test set: 0.6113
Resource measurements: {'Training Time (s)': 365.9485318660736, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 88.21424581005587}
                precision    recall  f1-score   support

          Bots     0.4972    0.6294    0.5555      1948
   Brute Force     0.0465    0.0303    0.0367      9150
          DDoS     0.0056    0.0002    0.0004    128014
           DoS     0.0681    0.1834    0.0993    193745
Normal Traffic     0.8108    0.6964    0.7492   2095057
         Other     0.2424    0.4853    0.3233     92837

      accuracy                         0.6113   2520751
     macro avg     0.2784    0.3375    0.2941   2520751
  weighted avg     0.6889    0.6113    0.6428   2520751



In [124]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_rf={'n_estimators': 64, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   19.1s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   40.7s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.4s finished


Predict Time (s) -  3.4890003204345703
Cross validation average score: 0.8999 +/- standard deviation: 0.0417
Accuracy on the test set: 0.8391
Resource measurements: {'Training Time (s)': 43.69391870498657, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 91.35597269624576}
                precision    recall  f1-score   support

          Bots     0.0000    0.0000    0.0000      1948
   Brute Force     0.0545    0.0239    0.0333      9150
          DDoS     0.0031    0.0002    0.0003    128014
           DoS     0.4956    0.0893    0.1514    193745
Normal Traffic     0.8508    0.9800    0.9108   2095057
         Other     0.7312    0.4798    0.5794     92837

      accuracy                         0.8391   2520751
     macro avg     0.3559    0.2622    0.2792   2520751
  weighted avg     0.7725    0.8391    0.7901   2520751



In [85]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   24.0s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:   54.3s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    1.1s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    3.1s finished


Predict Time (s) -  3.6930415630340576
Cross validation average score: 0.9106 +/- standard deviation: 0.0381
Accuracy on the test set: 0.8171
Resource measurements: {'Training Time (s)': 57.5700740814209, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 89.45647668393782}
                precision    recall  f1-score   support

          Bots     0.3772    0.2515    0.3018      1948
   Brute Force     0.3053    0.2362    0.2663      9150
          DDoS     0.0003    0.0000    0.0000    128014
           DoS     0.4162    0.0641    0.1111    193745
Normal Traffic     0.8457    0.9545    0.8968   2095057
         Other     0.3903    0.4829    0.4317     92837

      accuracy                         0.8171   2520751
     macro avg     0.3892    0.3315    0.3346   2520751
  weighted avg     0.7506    0.8171    0.7710   2520751



In [125]:
eval_dataset_w_LGBM(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 7, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  17.320778846740723
CV F1: 0.9091 ± 0.0406
Test Accuracy: 0.8301
                precision    recall  f1-score   support

          Bots     0.5912    0.6176    0.6041      1948
   Brute Force     0.0082    0.0077    0.0079      9150
          DDoS     0.9308    0.1613    0.2749    128014
           DoS     0.8712    0.3388    0.4879    193745
Normal Traffic     0.8723    0.9346    0.9024   2095057
         Other     0.2792    0.5047    0.3595     92837

      accuracy                         0.8301   2520751
     macro avg     0.5921    0.4274    0.4394   2520751
  weighted avg     0.8500    0.8301    0.8152   2520751

Resource Usage: {'Training Time (s)': 57.91235303878784, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 98.60220994475138}


In [86]:
eval_dataset_w_LGBM(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  18.09928822517395
CV F1: 0.9103 ± 0.0399
Test Accuracy: 0.7995
                precision    recall  f1-score   support

          Bots     0.5822    0.5965    0.5892      1948
   Brute Force     0.0084    0.0061    0.0071      9150
          DDoS     0.2496    0.0024    0.0048    128014
           DoS     0.5940    0.0840    0.1472    193745
Normal Traffic     0.8444    0.9320    0.8860   2095057
         Other     0.2625    0.4831    0.3402     92837

      accuracy                         0.7995   2520751
     macro avg     0.4235    0.3507    0.3291   2520751
  weighted avg     0.7702    0.7995    0.7610   2520751

Resource Usage: {'Training Time (s)': 62.33349633216858, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 98.81915254237289}


## Binary

### Preparing Sampling of training data

In [104]:
# Reading data
df = pd.read_csv("cicids2018_test_of_2017.csv")

X_train = df.drop('Attack Type', axis=1)
y_train = df['Attack Type']

del df

In [105]:
# Reading data
df = pd.read_csv("cicids2017_training.csv")

X_test = df.drop('Attack Type', axis=1)
y_test = df['Attack Type']

del df

In [106]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [107]:
y_train.value_counts()

Attack Type
Normal Traffic    13390249
DDoS               1263933
DoS                 654300
Brute Force         381784
Bots                286191
Other               160639
Name: count, dtype: int64

In [108]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 650000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [109]:
y_train_scaled_rus_MMS.value_counts()

Attack Type
DDoS              1263933
DoS                654300
Normal Traffic     650000
Brute Force        381784
Bots               286191
Other              160639
Name: count, dtype: int64

In [110]:
group_mapping_2017 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Web Attacks': 'Attack',
    'Port Scanning': 'Attack'}

In [111]:
group_mapping_2018 = {
    'Normal Traffic': 'BENIGN',
    'DoS': 'Attack',
    'DDoS': 'Attack',
    'Brute Force': 'Attack',
    'Bots': 'Attack',
    'Other': 'Attack'}

In [112]:
y_train_scaled_rus_MMS = y_train_scaled_rus_MMS.map(group_mapping_2018)
y_test = y_test.map(group_mapping_2017)

### Training and testing models

In [113]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  7.5min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.8s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.2s finished


Predict Time (s) -  3.085486650466919
Cross validation average score: 0.7963 +/- standard deviation: 0.1680
Accuracy on the test set: 0.5426
Resource measurements: {'Training Time (s)': 452.861873626709, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.86923466801825}
              precision    recall  f1-score   support

      Attack     0.0934    0.1962    0.1266    425694
      BENIGN     0.7896    0.6130    0.6902   2095057

    accuracy                         0.5426   2520751
   macro avg     0.4415    0.4046    0.4084   2520751
weighted avg     0.6720    0.5426    0.5950   2520751



In [114]:
eval_dataset_w_RF(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': "sqrt"})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   29.5s
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  1.1min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.7s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    2.2s finished


Predict Time (s) -  2.5260305404663086
Cross validation average score: 0.7899 +/- standard deviation: 0.1746
Accuracy on the test set: 0.8051
Resource measurements: {'Training Time (s)': 69.27593040466309, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 91.35565610859732}
              precision    recall  f1-score   support

      Attack     0.3479    0.1761    0.2338    425694
      BENIGN     0.8479    0.9329    0.8884   2095057

    accuracy                         0.8051   2520751
   macro avg     0.5979    0.5545    0.5611   2520751
weighted avg     0.7634    0.8051    0.7778   2520751



In [115]:
eval_dataset_w_LGBM(X_train_scaled_rus_MMS, X_test_MMS_scaled, y_train_scaled_rus_MMS, y_test, params_lgbm={'n_estimators': 228, 'learning_rate': 0.07241523535942174, 'max_depth': 14, 'num_leaves': 79, 'subsample': 0.5650088660864082, 'colsample_bytree': 0.8850730957587873, 'reg_alpha': 0.31650105405212536, 'reg_lambda': 3.8724602641849213})

Predict Time (s) -  3.323268413543701
CV F1: 0.7943 ± 0.1736
Test Accuracy: 0.6275
              precision    recall  f1-score   support

      Attack     0.2543    0.6241    0.3614    425694
      BENIGN     0.8916    0.6282    0.7371   2095057

    accuracy                         0.6275   2520751
   macro avg     0.5730    0.6261    0.5492   2520751
weighted avg     0.7840    0.6275    0.6736   2520751

Resource Usage: {'Training Time (s)': 15.476698875427246, 'Peak CPU (%)': 100.0, 'Avg CPU (%)': 95.73283582089553}
