<a href="https://colab.research.google.com/github/Hushpuppyzac/DLI-Assignment/blob/main/GUI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q gradio tensorflow joblib pandas numpy scikit-learn

import gradio as gr
import pandas as pd
import numpy as np
import joblib, traceback, io, os
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_curve, roc_auc_score, classification_report
)

import tensorflow as tf
from tensorflow.keras.models import model_from_json
from tensorflow.keras.optimizers import Adam


# =========================
# 1) Load ANN from .pkl
# =========================
PKL_PATH = "/content/drive/MyDrive/Eric_ann_ddos.pkl"  # <-- change if needed

def load_ann_from_pkl(pkl_path):
    bundle = joblib.load(pkl_path)
    if not (isinstance(bundle, dict) and bundle.get("type") == "ann_bundle_v1"):
        raise ValueError("Not an ANN bundle (.pkl with type='ann_bundle_v1').")

    model = model_from_json(bundle["model_json"])
    lr = bundle.get("compile", {}).get("lr", 1e-3)
    loss = bundle.get("compile", {}).get("loss", "binary_crossentropy")
    metrics = bundle.get("compile", {}).get("metrics", ["accuracy"])
    model.compile(optimizer=Adam(learning_rate=lr), loss=loss, metrics=metrics)
    model.set_weights(bundle["weights"])

    scaler = bundle.get("scaler", None)
    features = bundle.get("features", None)
    return model, scaler, features

model, scaler, feats = load_ann_from_pkl(PKL_PATH)


# =========================
# 2) Helpers
# =========================
def _read_csv_flex(file_obj_or_path):
    """Accepts Gradio file object or string path; tries tolerant read."""
    if hasattr(file_obj_or_path, "name"):
        path = file_obj_or_path.name
    elif isinstance(file_obj_or_path, str):
        path = file_obj_or_path
    else:
        raise ValueError("Unsupported CSV input.")

    try:
        return pd.read_csv(path, low_memory=False, on_bad_lines="skip", encoding_errors="replace")
    except Exception:
        return pd.read_csv(path, engine="python", on_bad_lines="skip", encoding_errors="replace")

def _prepare_numeric_matrix(df: pd.DataFrame, features, scaler):
    """
    Build exactly the feature matrix the scaler/model expect:
      - Prefer scaler.feature_names_in_ for canonical order
      - else use 'features' stored in .pkl
      - else use numeric columns present
      - add any missing columns as 0.0
      - strict column order
      - fill NaNs with 0.0
      - transform with scaler if provided
    Returns: (X_ndarray, X_used_df, missing_feature_list)
    """
    X = df.select_dtypes(include=[np.number]).copy()
    scaler_feats = list(getattr(scaler, "feature_names_in_", [])) if scaler is not None else []
    if len(scaler_feats) > 0:
        required = scaler_feats
    elif features is not None:
        required = list(features)
    else:
        required = list(X.columns)

    missing = [f for f in required if f not in X.columns]
    for m in missing:
        X[m] = 0.0

    X = X[required].fillna(0.0)
    X_nd = scaler.transform(X.values) if scaler is not None else X.values
    return X_nd, X, missing

def _predict_proba(model: tf.keras.Model, X_nd):
    return model.predict(X_nd, verbose=0).ravel()


# =========================
# 3) Core: predict + visualize
# =========================
def predict_and_visualize(csv_file, threshold):
    try:
        df = _read_csv_flex(csv_file)
        if df.empty:
            return ("CSV is empty.", None, None, None, None, None)

        # Find optional label column
        cand = [c for c in df.columns if c.lower() in ("label", "target", "class", "attack", "category")]
        label_col = cand[0] if cand else None

        if label_col:
            mapping = {"BENIGN":0,"Benign":0,"benign":0,"DDoS":1,"DDOS":1,"ddos":1}
            try:
                y_true = df[label_col].map(mapping).fillna(df[label_col]).astype(int).values
            except Exception:
                y_true = None
            X_df = df.drop(columns=[label_col])
        else:
            y_true = None
            X_df = df

        # Prepare features
        X_nd, X_used, missing = _prepare_numeric_matrix(X_df, feats, scaler)

        # Predict
        probs = _predict_proba(model, X_nd)
        preds = (probs >= float(threshold)).astype(int)

        # Compose summary text
        lines = []
        lines.append(f"Rows: {len(df)}  |  Numeric features used: {X_used.shape[1]}")
        lines.append(f"Threshold: {threshold:.2f}")
        if missing:
            lines.append(f"Filled {len(missing)} missing feature(s) with 0.0: {', '.join(missing[:10])}{' ...' if len(missing)>10 else ''}")

        # Build preview table
        preview = df.copy()
        preview["prob_ddos"] = probs
        preview["pred_label"] = preds
        preview_show = preview.head(30)

        # Save full results
        out_path = "/content/ann_ddos_predictions.csv"
        preview.to_csv(out_path, index=False)

        # Figures
        cm_fig = None
        roc_fig = None

        # If labels provided, compute metrics + plots
        if y_true is not None and len(y_true) == len(preds):
            acc = accuracy_score(y_true, preds)
            prec = precision_score(y_true, preds, zero_division=0)
            rec = recall_score(y_true, preds, zero_division=0)
            f1 = f1_score(y_true, preds, zero_division=0)
            lines.append(f"Accuracy: {acc:.3f} | Precision(DDoS): {prec:.3f} | Recall(DDoS): {rec:.3f} | F1: {f1:.3f}")

            # Confusion Matrix
            cm = confusion_matrix(y_true, preds, labels=[0,1])
            cm_fig, ax = plt.subplots()
            im = ax.imshow(cm, interpolation="nearest")
            ax.set_title("Confusion Matrix")
            ax.set_xticks([0,1]); ax.set_yticks([0,1])
            ax.set_xticklabels(["BENIGN","DDoS"]); ax.set_yticklabels(["BENIGN","DDoS"])
            for i in range(cm.shape[0]):
                for j in range(cm.shape[1]):
                    ax.text(j, i, cm[i,j], ha="center", va="center")
            ax.set_xlabel("Predicted"); ax.set_ylabel("True")
            plt.tight_layout()

            # ROC
            try:
                fpr, tpr, _ = roc_curve(y_true, probs)
                auc = roc_auc_score(y_true, probs)
                roc_fig, ax2 = plt.subplots()
                ax2.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
                ax2.plot([0,1], [0,1], linestyle="--")
                ax2.set_xlabel("False Positive Rate")
                ax2.set_ylabel("True Positive Rate")
                ax2.set_title("ROC Curve")
                ax2.legend()
                plt.tight_layout()
            except Exception:
                pass
        else:
            lines.append("No labels found — showing predictions only (no CM/ROC).")

        # Probability per-row plot
        prob_fig, pax = plt.subplots()
        pax.plot(probs, label="P(DDoS)")
        pax.axhline(float(threshold), linestyle="--", label=f"Threshold={threshold:.2f}")
        pax.set_xlabel("Row index")
        pax.set_ylabel("Predicted Probability of DDoS")
        pax.set_title("Prediction Confidence per Sample")
        pax.legend()
        plt.tight_layout()

        summary = "\n".join(lines)
        return summary, preview_show, cm_fig, roc_fig, prob_fig, out_path

    except Exception as e:
        tb = traceback.format_exc()
        err = f"Error:\n{e}\n\nTraceback:\n{tb}"
        return (err, None, None, None, None, None)


# =========================
# 4) Gradio UI
# =========================
with gr.Blocks(title="ANN DDoS Detector") as demo:
    gr.Markdown("## 🛡️ ANN DDoS Detector — Predict & Visualize")
    with gr.Row():
        csv_in = gr.File(label="Upload CSV")
    with gr.Row():
        thr = gr.Slider(0.05, 0.95, value=0.50, step=0.01,
                        label="Decision threshold (DDoS if prob ≥ threshold)")
        run_btn = gr.Button("Run Prediction & Visualization", variant="primary")

    summary = gr.Textbox(label="Summary / Logs", lines=8)
    preview = gr.Dataframe(label="Preview (first 30 rows)", wrap=True)

    with gr.Row():
        cm_plot = gr.Plot(label="Confusion Matrix (requires labels)")
        roc_plot = gr.Plot(label="ROC Curve (requires labels)")

    prob_plot = gr.Plot(label="Probability per Row")
    dl = gr.File(label="Download full predictions (CSV)")

    run_btn.click(
        fn=predict_and_visualize,
        inputs=[csv_in, thr],
        outputs=[summary, preview, cm_plot, roc_plot, prob_plot, dl]
    )

demo.launch(share=True, debug=True)
