In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import os
import sys
from sklearn.model_selection import GridSearchCV

# Load the UNSW-NB15 dataset
# Assuming the dataset is in a CSV file named 'UNSW_NB15.csv'
# Replace 'UNSW_NB15.csv' with the actual path to your dataset

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)

csv_file_name = "all_preprocessed_downsampled"

data = pd.read_csv(os.path.join(project_root, "Datasets", f"UNSW_NB15/All/{csv_file_name}.csv"))

# Preprocess the dataset
# Assuming the target column is named 'label' and features are all other columns
X = data.drop(columns=["srcip", "dstip", "attack_cat", "label", "Stime", "Ltime"])
y = data['attack_cat']

# Split the dataset into train, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Define the parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
}

# Perform Grid Search with cross-validation
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=3,
                           n_jobs=-1,
                           verbose=2)

grid_search.fit(X_train, y_train)

# Get the best parameters and train the model with them
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

rf = RandomForestClassifier(random_state=42, **best_params)
rf.fit(X_train, y_train)

# Test the model
y_test_pred = rf.predict(X_test)
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred))

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END ......................max_depth=10, n_estimators=50; total time=  18.6s
[CV] END ......................max_depth=10, n_estimators=50; total time=  19.5s
[CV] END ......................max_depth=10, n_estimators=50; total time=  19.6s
[CV] END ....................max_depth=None, n_estimators=50; total time=  23.2s
[CV] END ....................max_depth=None, n_estimators=50; total time=  23.4s
[CV] END ....................max_depth=None, n_estimators=50; total time=  24.5s
[CV] END .....................max_depth=10, n_estimators=100; total time=  42.7s
[CV] END .....................max_depth=10, n_estimators=100; total time=  43.9s
[CV] END .....................max_depth=10, n_estimators=100; total time=  44.6s
[CV] END ......................max_depth=20, n_estimators=50; total time=  26.9s
[CV] END ...................max_depth=None, n_estimators=100; total time=  50.8s
[CV] END ...................max_depth=None, n_es

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                precision    recall  f1-score   support

      Analysis       0.55      0.15      0.24       818
      Backdoor       0.71      0.09      0.16       526
     Backdoors       0.00      0.00      0.00       157
           DoS       0.39      0.16      0.22      4945
      Exploits       0.62      0.89      0.73     13374
       Fuzzers       0.81      0.87      0.84      7317
       Generic       1.00      0.99      0.99     64420
        Normal       1.00      0.99      0.99     66713
Reconnaissance       0.92      0.76      0.83      4181
     Shellcode       0.61      0.61      0.61       451
         Worms       0.47      0.15      0.23        46

      accuracy                           0.93    162948
     macro avg       0.64      0.51      0.53    162948
  weighted avg       0.93      0.93      0.93    162948



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import numpy as np



# Perturb the test set by adding noise
def perturb_data(X, noise_level=0.01):
    noise = noise_level * np.random.randn(*X.shape)
    return X + noise
perturb_data(X_test.copy(), noise_level=0.01)
X_test_perturbed = 