In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

In [5]:
df = pd.read_csv(".././working/combined_dataset.csv")

# Drop non-numeric columns (except 'Label')
non_numeric_cols = ["Flow ID", "Src IP", "Dst IP", "Timestamp"]
df = df.drop(columns=non_numeric_cols, errors='ignore')

# Encode categorical label (Label)
label_encoder = LabelEncoder()
df["Label"] = label_encoder.fit_transform(df["Label"])

In [11]:
scaler = StandardScaler()
X = df.drop(columns=["Label"])
y = df["Label"].values
df.replace([np.inf, -np.inf], np.nan, inplace=True)
# Fill NaN values with the mean of the column
df.fillna(df.mean(), inplace=True)
X_scaled = scaler.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [12]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# (B) XGBoost Classifier
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)

Parameters: { "use_label_encoder" } are not used.



In [13]:
print("🎯 Random Forest Accuracy:", accuracy_score(y_test, rf_preds))
print(classification_report(y_test, rf_preds, target_names=label_encoder.classes_))

print("\n🎯 XGBoost Accuracy:", accuracy_score(y_test, xgb_preds))
print(classification_report(y_test, xgb_preds, target_names=label_encoder.classes_))

🎯 Random Forest Accuracy: 0.8366893586528126
                    precision    recall  f1-score   support

            benign       0.63      0.72      0.67     12050
     ddos_ack_frag       0.98      0.97      0.97     73457
   ddos_http_flood       0.99      0.98      0.98     15141
   ddos_icmp_flood       0.49      0.50      0.49      6053
    ddos_icmp_frag       0.51      0.52      0.52      9190
  dict_brute_force       0.07      0.05      0.06        95
      dns_spoofing       0.54      0.42      0.47      2213
mirai_greeth_flood       0.48      0.43      0.45      5294
    mitm_arp_spoof       0.57      0.47      0.52      2490
     sql_injection       0.27      0.16      0.20       189
  uploading_attack       0.43      0.26      0.32        39
vulnerability_scan       0.72      0.74      0.73     13241
               xss       0.31      0.16      0.21        98

          accuracy                           0.84    139550
         macro avg       0.54      0.49      0.51    