In [2]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report
import joblib

# Load numeric data
df = pd.read_csv("../data/raw/UNSW_NB15_training-set.csv")
df = df.dropna()
df['label'] = df['attack_cat'].apply(lambda x: 0 if x == 'Normal' else 1)
df = df.drop(columns=['attack_cat'])
numeric_df = df.select_dtypes(include=['int64', 'float64'])

# Labels
y = df['label']
X = numeric_df

# Train Isolation Forest
model_if = IsolationForest(n_estimators=200, contamination=0.1, random_state=42)
model_if.fit(X)

# Predictions
preds = model_if.predict(X)

# Convert Isolation Forest output to 0 = normal, 1 = attack
preds = [1 if p == -1 else 0 for p in preds]

print(classification_report(y, preds))

joblib.dump(model_if, "../models/isolation_forest.pkl")



              precision    recall  f1-score   support

           0       0.31      0.87      0.45     56000
           1       0.57      0.08      0.15    119341

    accuracy                           0.33    175341
   macro avg       0.44      0.48      0.30    175341
weighted avg       0.49      0.33      0.24    175341



['../models/isolation_forest.pkl']

In [4]:
from sklearn.svm import OneClassSVM

ocsvm = OneClassSVM(kernel='rbf', nu=0.1, gamma='scale')
ocsvm.fit(X)

preds_svm = ocsvm.predict(X)
preds_svm = [1 if p == -1 else 0 for p in preds_svm]

print(classification_report(y, preds_svm))

joblib.dump(ocsvm, "../models/oneclass_svm.pkl")


              precision    recall  f1-score   support

           0       0.30      0.84      0.44     56000
           1       0.54      0.09      0.15    119341

    accuracy                           0.33    175341
   macro avg       0.42      0.46      0.30    175341
weighted avg       0.46      0.33      0.24    175341



['../models/oneclass_svm.pkl']