In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
file_path = 'cyber_dataset.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Clean columns with string representations of lists
def clean_column(column):
    return column.apply(lambda x: float(ast.literal_eval(x)[0]) if isinstance(x, str) and '[' in x else float(x))

# Apply cleaning to relevant columns
columns_to_clean = ['DnsAnswerTTL', 'NumberOfAnswers', 'DnsResponseCode', 'DnsOpCode']
for col in columns_to_clean:
    data[col] = clean_column(data[col])

# Define features and target
features = ['DnsAnswerTTL', 'NumberOfAnswers', 'DnsResponseCode', 'DnsOpCode']
data['anomaly'] = np.where((data['sus'] == 1) | (data['evil'] == 1), 1, 0)  # Label anomalies
data = data.dropna(subset=features)  # Ensure no missing values
X = data[features].astype(float)
y = data['anomaly']

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build and train the SVM model with RBF kernel
svm_model = SVC(
    kernel='rbf',              # Use radial basis function kernel
    C=1.0,                     # Regularization parameter
    gamma='scale',             # Kernel coefficient
    probability=True,          # Enable probability estimates for classification
    random_state=42
)
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy (SVM): {accuracy:.4f}")

# Print classification report and confusion matrix
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred))


Test Accuracy (SVM): 0.9231

Classification Report (SVM):
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       0.00      0.00      0.00         1

    accuracy                           0.92        13
   macro avg       0.46      0.50      0.48        13
weighted avg       0.85      0.92      0.89        13


Confusion Matrix (SVM):
[[12  0]
 [ 1  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
