In [21]:
import pandas as pd
import numpy as np
import json
from datetime import datetime
from pathlib import Path

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

import joblib

In [22]:
train_path = "../../data/KDDTrain+.csv"
test_path = "../../data/KDDTest+.csv"

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [23]:
target_col = "attack_class"

X_train = train_df.drop(columns=[target_col, "attack"])
y_train = train_df[target_col]

X_test = test_df.drop(columns=[target_col, "attack"])
y_test = test_df[target_col]

In [24]:
categorical_features = ["protocol_type", "service", "flag"]
numerical_features = [col for col in X_train.columns if col not in categorical_features]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
    ("num", StandardScaler(), numerical_features)
])


In [25]:
svm_pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("classifier", SVC(
        kernel="rbf",
        C=1.0,
        gamma="scale",
        class_weight="balanced"
    ))
])

svm_pipeline.fit(X_train, y_train)
y_pred = svm_pipeline.predict(X_test)

In [26]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

report = classification_report(y_test, y_pred, output_dict=True)
print(classification_report(y_test, y_pred))

Confusion Matrix:
 [[5730 1653   76    0    0]
 [  61 9398  226    8   18]
 [ 176  555 1688    0    2]
 [   0 2256   28  354  247]
 [   0   46    0    5   16]]
              precision    recall  f1-score   support

         DoS       0.96      0.77      0.85      7459
      Normal       0.68      0.97      0.80      9711
       Probe       0.84      0.70      0.76      2421
         R2L       0.96      0.12      0.22      2885
         U2R       0.06      0.24      0.09        67

    accuracy                           0.76     22543
   macro avg       0.70      0.56      0.54     22543
weighted avg       0.82      0.76      0.74     22543



In [9]:
artifacts_dir = Path("../../models/artifacts")
artifacts_dir.mkdir(parents=True, exist_ok=True)

joblib.dump(svm_pipeline, artifacts_dir / "svm.joblib")

# Metadata
metadata_path = Path("../../models/metadata.json")
metadata = {}
if metadata_path.exists():
    with open(metadata_path, "r") as f:
        metadata = json.load(f)

metadata["SVM_RBF"] = {
    "model_name": "SVM (RBF Kernel)",
    "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "task": "Intrusion Detection",
    "metrics": report,
    "hyperparameters": svm_pipeline.named_steps["classifier"].get_params()
}

with open(metadata_path, "w") as f:
    json.dump(metadata, f, indent=4)