In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

In [34]:
# Load dataset
hpc_data = pd.read_csv('/kaggle/input/network-traffic-for-dos-detection/dataset.csv')

In [35]:
# Preprocessing: Convert categorical variables to numerical labels using one-hot encoding
hpc_data = pd.get_dummies(hpc_data, columns=['vmcategory'])

In [36]:
# Drop unnecessary columns
hpc_data.drop(columns=['Id', 'IP', 'FunctionId', 'functionTrigger', 'timestamp'], inplace=True)

In [37]:
# Split data into features (X) and labels (y)
X = hpc_data.drop(columns=['bot'])
y = hpc_data['bot']

In [38]:
# Normalize features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)

In [39]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=42)

In [40]:
# Build Support Vector Machine (SVM) classifier
clf = SVC(kernel='rbf', random_state=42)

In [41]:
# Train the classifier
start_training_time = time.time()
clf.fit(X_train, y_train)
end_training_time = time.time()


In [43]:
# Make predictions on the testing set
start_prediction_time = time.time()
y_pred = clf.predict(X_test)
end_prediction_time = time.time()

# Calculate overhead (training time) and delay (prediction time)
overhead = end_training_time - start_training_time
delay = end_prediction_time - start_prediction_time

# Evaluate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print performance metrics
print("Performance Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Print overhead and delay
print("Overhead (Training Time):", overhead, "seconds")
print("Delay (Prediction Time per Sample):", delay / len(X_test), "seconds")

Performance Metrics:
Accuracy: 0.7021041566447521
Precision: 0.7021041566447521
Recall: 1.0
F1-score: 0.8249837754098017
Overhead (Training Time): 604.5587801933289 seconds
Delay (Prediction Time per Sample): 0.005169073354087733 seconds
