In [3]:
import pandas as pd
import numpy as np
import json
from datetime import datetime
from pathlib import Path

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

import joblib

In [4]:
train_path = "../../data/KDD_reduced.csv"


train_df = pd.read_csv(train_path)

In [5]:
target_col = "attack_class"

X = train_df.drop(columns=[target_col, "attack"])
y = train_df[target_col]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
categorical_features = ["protocol_type", "service", "flag"]
numerical_features = [col for col in X_train.columns if col not in categorical_features]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
    ("num", StandardScaler(), numerical_features)
])


In [7]:
svm_pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("classifier", SVC(
        kernel="rbf",
        C=1.0,
        gamma="scale",
        class_weight="balanced"
    ))
])

svm_pipeline.fit(X_train, y_train)
y_pred = svm_pipeline.predict(X_test)

In [8]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

report = classification_report(y_test, y_pred, output_dict=True)
print(classification_report(y_test, y_pred))

Confusion Matrix:
 [[ 9210     8     6     0     0]
 [   46 12924   101   234    81]
 [    3     8  2360     0     2]
 [    0     6     0   186     7]
 [    0     3     0     2     8]]
              precision    recall  f1-score   support

         DoS       0.99      1.00      1.00      9224
      Normal       1.00      0.97      0.98     13386
       Probe       0.96      0.99      0.98      2373
         R2L       0.44      0.93      0.60       199
         U2R       0.08      0.62      0.14        13

    accuracy                           0.98     25195
   macro avg       0.69      0.90      0.74     25195
weighted avg       0.99      0.98      0.98     25195



In [9]:
artifacts_dir = Path("../../models/artifacts")
artifacts_dir.mkdir(parents=True, exist_ok=True)

joblib.dump(svm_pipeline, artifacts_dir / "svm.joblib")

# Metadata
metadata_path = Path("../../models/metadata.json")
metadata = {}
if metadata_path.exists():
    with open(metadata_path, "r") as f:
        metadata = json.load(f)

metadata["SVM_RBF"] = {
    "model_name": "SVM (RBF Kernel)",
    "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "task": "Intrusion Detection",
    "metrics": report,
    "hyperparameters": svm_pipeline.named_steps["classifier"].get_params()
}

with open(metadata_path, "w") as f:
    json.dump(metadata, f, indent=4)

In [None]:
model_path = "../../models/svm_model.joblib"

In [13]:
joblib.dump(svm_pipeline, model_path)

['../../data/svm_model.joblib']