In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os

In [None]:
def load_dataset(path="nsl_kdd_sample.csv"):
    if not os.path.exists(path):
        print("Dataset not found. Creating synthetic NSL-KDD-like data...")

        data = {
            "duration": np.random.randint(0, 100, 1000),
            "protocol_type": np.random.choice(["tcp", "udp", "icmp"], 1000),
            "service": np.random.choice(["http", "ftp", "smtp", "dns"], 1000),
            "flag": np.random.choice(["SF", "REJ", "S0"], 1000),
            "src_bytes": np.random.randint(0, 5000, 1000),
            "dst_bytes": np.random.randint(0, 10000, 1000),
            "land": np.random.randint(0, 2, 1000),
            "wrong_fragment": np.random.randint(0, 3, 1000),
            "urgent": np.random.randint(0, 2, 1000),
            "label": np.random.choice(["normal", "dos", "probe", "r2l", "u2r"], 1000)
        }

        df = pd.DataFrame(data)
        df.to_csv(path, index=False)
    else:
        df = pd.read_csv(path)

    print(f"Dataset loaded: {df.shape}")
    return df

In [None]:
def preprocess(df):
    label_map = {
        "normal": 0,
        "dos": 1,
        "probe": 1,
        "r2l": 1,
        "u2r": 1
    }

    df["label"] = df["label"].map(label_map)

    cat_features = ["protocol_type", "service", "flag"]
    for col in cat_features:
        df[col] = LabelEncoder().fit_transform(df[col])

    X = df.drop("label", axis=1)
    y = df["label"]

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y, scaler

In [None]:
def train_model(X):
    model = IsolationForest(contamination=0.2, random_state=42)
    model.fit(X)
    return model

In [None]:
def evaluate(model, X, y):
    y_pred = model.predict(X)
    y_pred = [0 if p == 1 else 1 for p in y_pred]  # Invert prediction logic

    print("\nClassification Report:\n", classification_report(y, y_pred))
    sns.heatmap(confusion_matrix(y, y_pred), annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

In [None]:
def save(model, scaler):
    os.makedirs("models", exist_ok=True)
    joblib.dump(model, "models/ids_model.pkl")
    joblib.dump(scaler, "models/ids_scaler.pkl")
    print("Model and scaler saved.")
