In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os


#  Simulate or Load Network Dataset

def load_dataset(path="nids_dataset.csv"):
    if not os.path.exists(path):
        print("Dataset not found. Generating mock NIDS data...")

        data = {
            "duration": np.random.randint(0, 5000, 1000),
            "src_bytes": np.random.randint(0, 50000, 1000),
            "dst_bytes": np.random.randint(0, 30000, 1000),
            "wrong_fragment": np.random.randint(0, 3, 1000),
            "urgent": np.random.randint(0, 2, 1000),
            "count": np.random.randint(1, 100, 1000),
            "srv_count": np.random.randint(1, 100, 1000),
            "label": np.random.randint(0, 2, 1000)  # 0 = Normal, 1 = Attack
        }

        df = pd.DataFrame(data)
        df.to_csv(path, index=False)
    else:
        df = pd.read_csv(path)

    print(f"Loaded dataset with shape: {df.shape}")
    return df


#Preprocess Features

def preprocess(df):
    X = df.drop("label", axis=1)
    y = df["label"]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler


In [None]:

#  Train the ML Model

def train_model(X_train, y_train):
    model = GradientBoostingClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    return model


#  Evaluate Performance

def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap="YlGnBu")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.show()