# Imports and model functions

In [28]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline

from scipy.stats import randint, uniform
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve, RandomizedSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score, make_scorer

import time
import psutil
import threading
from memory_profiler import memory_usage

In [46]:
def apply_rf(X_train, y_train, best_params=None, random_state=42, n_jobs=-1, cv=5): 
    measurement_rf = {}
    best_params = best_params or {}

    rf_model = RandomForestClassifier(**best_params, random_state=random_state, n_jobs=n_jobs, verbose=1)
    
    cpu_usage = []
    stop_flag = threading.Event()

    def monitor_cpu():
        while not stop_flag.is_set():
            cpu_usage.append(psutil.cpu_percent(interval=0.1))

    def train_model():
        rf_model.fit(X_train, y_train)

    try:
        cpu_thread = threading.Thread(target=monitor_cpu)
        cpu_thread.start()

        start_time = time.time()
        train_model()
        training_time = time.time() - start_time

        stop_flag.set()
        cpu_thread.join()

        measurement_rf['Training Time (s)'] = training_time
        measurement_rf['Peak CPU Usage (%)'] = max(cpu_usage)
        measurement_rf['Average CPU Usage (%)'] = sum(cpu_usage) / len(cpu_usage) if cpu_usage else 0

        # Modified to use F1 score
        f1_scorer = make_scorer(f1_score, average='weighted')
        cv_scores_rf = cross_val_score(rf_model, X_train, y_train, cv=cv, n_jobs=n_jobs, scoring=f1_scorer)

        return cv_scores_rf, measurement_rf, rf_model

    except Exception as e:
        import traceback
        print("⛔ Full error traceback:")
        traceback.print_exc()
        print(f"Error during Random Forest training: {e}")
        return None, None, None

In [47]:
def eval_dataset_w_RF(X_train, X_test, y_train, y_test, params_rf={'n_estimators': 150, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}):
    # Fitting the model
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, best_params=params_rf)

    # Making predictions
    y_pred_rf = rf_model.predict(X_test)
    
    # Evaluating the model performance
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    print(f'Cross validation average score: {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')

    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f'Accuracy on the test set: {accuracy_rf:.4f}')
    
    # Checking computational cost
    print("Resource measurements:", measurement_rf)
    print(classification_report(y_test, y_pred_rf, digits=4))

In [53]:
import optuna
def show_results(X_train, X_test, y_train, y_test, n_trials=100):
    def objective(trial, X_train, y_train, cv=5):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_categorical('max_depth', [None] + list(range(5, 31))),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
            'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None])
        }
        
        cv_scores, _, model = apply_rf(X_train, y_train, best_params=params, cv=cv)
        if cv_scores is None:
            return 0
        return np.mean(cv_scores)
        
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=n_trials)
    best_params = study.best_params
    
    cv_scores_rf, measurement_rf, rf_model = apply_rf(X_train, y_train, n_jobs=15, best_params=best_params)
    
    if cv_scores_rf is None:
        print("Model training failed")
        return
    
    y_pred_rf = rf_model.predict(X_test)
    
    # Convert to numpy arrays and ensure same type
    y_test_array = np.array(y_test)
    y_pred_array = np.array(y_pred_rf)
    
    # Print unique values to debug
    print("\nUnique values in test set:", np.unique(y_test_array))
    print("Unique values in predictions:", np.unique(y_pred_array))
    
    cv_scores_mean_rf = np.mean(cv_scores_rf)
    
    try:
        f1 = f1_score(y_test_array, y_pred_array, average='weighted')
        accuracy = accuracy_score(y_test_array, y_pred_array)
        
        print("\nModel Evaluation Results:")
        print("-" * 50)
        print(f'Cross validation average score (F1): {cv_scores_mean_rf:.4f} +/- standard deviation: {np.std(cv_scores_rf):.4f}')
        print(f'F1 Score on test set: {f1:.4f}')
        print(f'Accuracy on test set: {accuracy:.4f}')
        print("\nResource Usage:")
        print("-" * 50)
        print("Resource measurements:", measurement_rf)
        print("\nDetailed Classification Report:")
        print("-" * 50)
        print(classification_report(y_test_array, y_pred_array))
    
    except Exception as e:
        print(f"Error during metric calculation: {str(e)}")
        print("Types in test set:", y_test_array.dtype)
        print("Types in predictions:", y_pred_array.dtype)
        raise
    
    return rf_model, best_params

# Prep for model training cicids2017

In [33]:
# Reading data
df = pd.read_csv("..\..\data prep\cicids2017_prep\cicids2017_42feat_97percent.csv")

In [34]:
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE, BorderlineSMOTE

In [35]:
# Preparing training and test splits
X = df.drop('Attack Type', axis=1)
y = df['Attack Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [36]:
# Initialize scaling algos
MMS = MinMaxScaler()
X_train_MMS_scaled = MMS.fit_transform(X_train)
X_test_MMS_scaled = MMS.transform(X_test)

In [37]:
X_train_scaled_rus_MMS, y_train_scaled_rus_MMS = RandomUnderSampler(sampling_strategy={'Normal Traffic': 500000}, random_state=42).fit_resample(X_train_MMS_scaled, y_train)

In [38]:
X_train_resampled_scaled_MMS_SMOTE, y_train_resampled_scaled_MMS_SMOTE = SMOTE(sampling_strategy={'Bots': 7500, 'Web Attacks': 7500, 'Brute Force': 7000, 'Port Scanning': 70000, 'DDoS':90000, 'DoS': 200000}, random_state=42).fit_resample(X_train_scaled_rus_MMS, y_train_scaled_rus_MMS)

# Sync classes

In [39]:
# Function to combine classes
def combine_classes(y, class_mapping):
    return y.map(class_mapping)
# Define the mapping
class_mapping = {
    'Web Attacks': 'Other',
    'Port Scanning': 'Other',
    'Normal Traffic': 'Normal Traffic',
    'Bots': 'Bots',
    'Brute Force': 'Brute Force',
    'DDoS': 'DDoS',
    'DoS': 'DoS'
}

In [40]:
df["Attack Type"].unique()

array(['Normal Traffic', 'DDoS', 'Port Scanning', 'Bots', 'Web Attacks',
       'Brute Force', 'DoS'], dtype=object)

In [41]:
# Apply to all your sets
y_train = combine_classes(y_train, class_mapping)
y_test = combine_classes(y_test, class_mapping)

y_train_scaled_rus_MMS = combine_classes(y_train_scaled_rus_MMS, class_mapping)
y_train_resampled_scaled_MMS_SMOTE = combine_classes(y_train_resampled_scaled_MMS_SMOTE, class_mapping)

In [42]:
y_test.unique()

array(['Normal Traffic', 'DoS', 'DDoS', 'Bots', 'Other', 'Brute Force'],
      dtype=object)

# Search best params for MMS SMOTE

In [54]:
rf_model, best_params = show_results(X_train_resampled_scaled_MMS_SMOTE, 
                                    X_test_MMS_scaled,
                                    y_train_resampled_scaled_MMS_SMOTE, 
                                    y_test, 
                                    n_trials=30)

[I 2025-05-01 18:10:53,222] A new study created in memory with name: no-name-946a3546-e646-4233-8d0e-7c9c1e13d505
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 196 out of 196 | elapsed:   53.7s finished
[I 2025-05-01 18:15:01,254] Trial 0 finished with value: 0.9880452135529687 and parameters: {'n_estimators': 196, 'max_depth': 18, 'min_samples_split': 19, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 0 with value: 0.9880452135529687.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  5.6min
[Parallel(n_jobs=-1)]: Done 218 out of 218 | elapsed:  7.1min finished
[I 2025-05-01 18:50:00,689] Trial 1 finished with value: 0.9892499848987061 and parameters: {'n_estimators': 218, 'max_depth': 16, 'min_sample


Unique values in test set: ['Bots' 'Brute Force' 'DDoS' 'DoS' 'Normal Traffic' 'Other']
Unique values in predictions: ['Bots' 'Brute Force' 'DDoS' 'DoS' 'Normal Traffic' 'Other']

Model Evaluation Results:
--------------------------------------------------
Cross validation average score (F1): 0.9893 +/- standard deviation: 0.0191
F1 Score on test set: 0.9988
Accuracy on test set: 0.9988

Resource Usage:
--------------------------------------------------
Resource measurements: {'Training Time (s)': 135.58292961120605, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 87.36620900076319}

Detailed Classification Report:
--------------------------------------------------
                precision    recall  f1-score   support

          Bots       0.66      0.96      0.78       584
   Brute Force       0.99      1.00      1.00      2745
          DDoS       1.00      1.00      1.00     38404
           DoS       1.00      1.00      1.00     58124
Normal Traffic       1.00      1.00   

In [55]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 200, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   59.0s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  5.7min
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  6.6min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    2.0s
[Parallel(n_jobs=16)]: Done 200 out of 200 | elapsed:    2.4s finished


Cross validation average score: 0.9895 +/- standard deviation: 0.0189
Accuracy on the test set: 0.9988
Resource measurements: {'Training Time (s)': 393.97414994239807, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 95.66547231270364}
                precision    recall  f1-score   support

          Bots     0.6753    0.9401    0.7860       584
   Brute Force     0.9946    0.9985    0.9965      2745
          DDoS     0.9995    0.9998    0.9996     38404
           DoS     0.9967    0.9993    0.9980     58124
Normal Traffic     0.9999    0.9987    0.9993    628518
         Other     0.9886    0.9992    0.9939     27851

      accuracy                         0.9988    756226
     macro avg     0.9424    0.9893    0.9622    756226
  weighted avg     0.9989    0.9988    0.9988    756226



In [56]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 115, 'max_depth': 30, 'min_samples_split': 11, 'min_samples_leaf': 1, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 115 out of 115 | elapsed:  3.9min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 115 out of 115 | elapsed:    1.3s finished


Cross validation average score: 0.9894 +/- standard deviation: 0.0190
Accuracy on the test set: 0.9988
Resource measurements: {'Training Time (s)': 235.19409823417664, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 90.77283281733745}
                precision    recall  f1-score   support

          Bots     0.6773    0.9452    0.7891       584
   Brute Force     0.9935    0.9985    0.9960      2745
          DDoS     0.9995    0.9998    0.9996     38404
           DoS     0.9968    0.9993    0.9981     58124
Normal Traffic     0.9998    0.9987    0.9993    628518
         Other     0.9885    0.9988    0.9936     27851

      accuracy                         0.9988    756226
     macro avg     0.9426    0.9901    0.9626    756226
  weighted avg     0.9989    0.9988    0.9988    756226



In [57]:
eval_dataset_w_RF(X_train_resampled_scaled_MMS_SMOTE, X_test_MMS_scaled, y_train_resampled_scaled_MMS_SMOTE, y_test, params_rf={'n_estimators': 64, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': None})

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done  64 out of  64 | elapsed:  2.2min finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done  64 out of  64 | elapsed:    0.7s finished


Cross validation average score: 0.9893 +/- standard deviation: 0.0191
Accuracy on the test set: 0.9988
Resource measurements: {'Training Time (s)': 132.99807024002075, 'Peak CPU Usage (%)': 100.0, 'Average CPU Usage (%)': 96.08823529411767}
                precision    recall  f1-score   support

          Bots     0.6608    0.9572    0.7818       584
   Brute Force     0.9931    0.9982    0.9956      2745
          DDoS     0.9994    0.9997    0.9996     38404
           DoS     0.9966    0.9992    0.9979     58124
Normal Traffic     0.9999    0.9987    0.9993    628518
         Other     0.9886    0.9991    0.9938     27851

      accuracy                         0.9988    756226
     macro avg     0.9397    0.9920    0.9613    756226
  weighted avg     0.9989    0.9988    0.9988    756226

