In [35]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
# Load merged dataset
df=pd.read_csv("road/preprocessed/merged/attack_data_without_masquerade.csv")

X = df.drop(columns=['Flag'], errors='ignore')
y = df['Flag']

# Stratified split Train/Test (70/30)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.7, random_state=42, stratify=y
)

# Confirm sizes
print("Train samples:", len(X_train))
print("Test samples:", len(X_test))
results = []

Train samples: 1078308
Test samples: 462133


In [36]:
"""----------DNN----------"""
#for bs in batch_sizes:
print(f"\nTraining with batch size: 32")
model = Sequential([   
Input(shape=(X_train.shape[1],)),  # Input layer (should be 10 features)
Dense(16, activation='relu'),#Dropout(0.3),
Dense(16, activation='relu'),#Dropout(0.3),
Dense(16, activation='relu'),#Dropout(0.3),
Dense(16, activation='relu'),#Dropout(0.3),
Dense(1, activation='sigmoid')  # Output layer: 1 neurons
])  # Rebuild model from scratch
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stop = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_split=0.1, epochs=50, batch_size=32, verbose=1)

    # ----- Predict -----
y_pred_prob = model.predict(X_test, batch_size=1024)
y_pred = (y_pred_prob > 0.5).astype(int)

# ----- Evaluation -----
# Convert multiclass to binary: 0 = normal, 1 = any attack
y_test_binary = (y_test != 0).astype(int)
y_pred_binary = (y_pred != 0).astype(int)

# Calculate F1 score and confusion matrix
f1 = f1_score(y_test_binary, y_pred_binary, zero_division=0)
cm = confusion_matrix(y_test_binary, y_pred_binary)
tn, fp, fn, tp = cm.ravel()
# Store in your results table
dnn=model
results.append(["DNN", tn, tp, f"{f1*100:.1f}%", fn, fp])
results_df = pd.DataFrame(results, columns=["Model", "Benign Samples", "Malicious Samples", "F1 score", "FN", "FP"])
display(results_df)


Training with batch size: 32
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


Unnamed: 0,Model,Benign Samples,Malicious Samples,F1 score,FN,FP
0,DNN,445522,11353,81.2%,3546,1712


In [37]:
from sklearn.metrics import f1_score

# ---------- Decision Tree ----------
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
f1 = f1_score(y_test, y_pred_dt, zero_division=0)
cm = confusion_matrix(y_test, y_pred_dt)
tn, fp, fn, tp = cm.ravel()
print("DT is ready")
results.append(["DT", tn, tp, f"{f1*100:.1f}%", fn, fp])

DT is ready


In [38]:
# ---------- Random Forest ----------
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
# ---------- Random Forest ----------
y_pred_rf = rf.predict(X_test)

f1 = f1_score(y_test, y_pred_rf, zero_division=0)
cm = confusion_matrix(y_test, y_pred_rf)
tn, fp, fn, tp = cm.ravel()

results.append(["RF", tn, tp, f"{f1*100:.1f}%", fn, fp])
print("RF is ready")

RF is ready


In [39]:
# ---------- Extra Trees ----------
et = ExtraTreesClassifier(n_estimators=100)
et.fit(X_train, y_train)
# ---------- Extra Trees ----------
y_pred_et = et.predict(X_test)

f1 = f1_score(y_test, y_pred_et, zero_division=0)
cm = confusion_matrix(y_test, y_pred_et)
tn, fp, fn, tp = cm.ravel()

results.append(["ET", tn, tp, f"{f1*100:.1f}%", fn, fp])
print("ET is ready")


ET is ready


In [40]:
from sklearn.metrics import f1_score, confusion_matrix

# Clean column names for XGBoost compatibility
X.columns = [col.replace("[", "_").replace("]", "") for col in X.columns]
# Drop Timestamp, use remaining as features
X = df.drop(columns=['Timestamp', 'Flag'], errors='ignore')
y = df['Flag']

# Fix column names for XGBoost
X.columns = [col.replace("[", "_").replace("]", "") for col in X.columns]

# Stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.7, random_state=42, stratify=y
)
# ---------- XGBoost ----------
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', base_score=0.5)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

f1 = f1_score(y_test, y_pred_xgb, zero_division=0)
cm = confusion_matrix(y_test, y_pred_xgb)
tn, fp, fn, tp = cm.ravel()

results.append(["XGBoost", tn, tp, f"{f1*100:.1f}%", fn, fp])

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [41]:
# Save to DataFrame
results_df = pd.DataFrame(results, columns=["Model", "Benign Samples", "Malicious Samples", "F1 score", "FN", "FP"])

# Save to CSV and LaTeX
results_df.to_csv("road_IDS_evaluation_results.csv", index=False)
display(results_df)

Unnamed: 0,Model,Benign Samples,Malicious Samples,F1 score,FN,FP
0,DNN,445522,11353,81.2%,3546,1712
1,DT,446315,13205,91.0%,1694,919
2,RF,446323,13215,91.1%,1684,911
3,ET,446336,13196,91.0%,1703,898
4,XGBoost,446303,13217,91.0%,1682,931


In [42]:
import joblib
# Save the model
dnn.save("road/models/dnn_model.h5")
joblib.dump(rf, 'road/models/rf_model.pkl')
joblib.dump(xgb, 'road/models/xgb_model.pkl')
joblib.dump(dt, 'road/models/dt_model.pkl')
joblib.dump(et, 'road/models/et_model.pkl')


['road/models/et_model.pkl']