<a href="https://colab.research.google.com/github/Saheed7/anomaly-agent-demo/blob/main/Anomaly_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Anomaly_Agent_Demo

In [None]:
# 1. Install Dependencies
!pip install pandas scikit-learn shap gradio transformers

In [None]:
# 2. Import Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import MinMaxScaler
import shap
import gradio as gr
import joblib

In [None]:
# 3. Load Dataset (Replace with Edge-IIoTset/CIC-IoT 2023 paths)
# Note: For Edge-IIoTset, use: df = pd.read_csv("Edge-IIoTset.csv")
# For CIC-IoT 2023, use: df = pd.read_csv("CIC-IoT-2023.csv")
# Example synthetic data for illustration:
data = {
    'packet_size': np.random.randint(50, 1000, 100),
    'protocol_type': np.random.choice(['Modbus', 'MQTT', 'HTTP'], 100),
    'packet_rate': np.random.uniform(1, 100, 100),
    'label': np.random.choice(['Benign', 'DDoS', 'Reconnaissance'], 100)
}
df = pd.DataFrame(data)

In [None]:
# 4. Preprocessing
def preprocess_data(df):
    # Encode categorical features (e.g., protocol_type)
    df = pd.get_dummies(df, columns=['protocol_type'])

    # Normalize numerical features
    scaler = MinMaxScaler()
    numerical_features = ['packet_size', 'packet_rate']
    df[numerical_features] = scaler.fit_transform(df[numerical_features])

    # Quantize to 8-bit (example)
    df[numerical_features] = (df[numerical_features] * 255).astype(np.uint8)

    # Separate features and labels
    X = df.drop('label', axis=1)
    y = df['label']
    return X, y

X, y = preprocess_data(df)

In [None]:
# 5. Train Models
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100),
    'SVM': SVC(kernel='rbf', C=1.0, probability=True),
    'Naïve Bayes': GaussianNB()
}

for name, model in models.items():
    model.fit(X, y)
    joblib.dump(model, f'{name}.pkl')  # Save models

In [None]:
# 6. SHAP Analysis
explainer = shap.KernelExplainer(models['Random Forest'].predict_proba, X)
shap_values = explainer.shap_values(X)

In [None]:
# 7. Inference Pipeline
def anomaly_agent_inference(features):
    # Preprocess input
    input_df = pd.DataFrame([features])
    input_df = pd.get_dummies(input_df)
    input_df = input_df.reindex(columns=X.columns, fill_value=0)

    # Predict
    predictions = {}
    for name, model in models.items():
        proba = model.predict_proba(input_df)[0]
        predictions[name] = {model.classes_[i]: proba[i] for i in range(len(proba))}

    # SHAP explanation
    shap_explanation = explainer.shap_values(input_df)
    return predictions, shap_explanation

In [None]:
# 8. Gradio Interface for Demo
def predict(packet_size, packet_rate, protocol_type):
    features = {
        'packet_size': packet_size,
        'packet_rate': packet_rate,
        'protocol_type': protocol_type
    }
    predictions, shap_exp = anomaly_agent_inference(features)
    explanation = f"SHAP Values: {shap_exp[0][0]:.2f} (packet_size), {shap_exp[0][1]:.2f} (packet_rate)"
    return predictions, explanation

iface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Number(label="Packet Size"),
        gr.Number(label="Packet Rate"),
        gr.Dropdown(['Modbus', 'MQTT', 'HTTP'], label="Protocol Type")
    ],
    outputs=[
        gr.Label(label="Model Predictions"),
        gr.Textbox(label="Explanation")
    ],
    title="Anomaly Agent Demo"
)

iface.launch()