# Gradio Demo – Time Series

Forecast n steps ahead using a saved univariate time series model.

In [None]:
# app.py
# Gradio app for a saved PyCaret anomaly model: iforest_pipeline
# Run:
#   pip install gradio pandas pycaret
#   python app.py

import os
import tempfile
import pandas as pd
import gradio as gr

# pycaret imports (only what we need)
from pycaret.anomaly import load_model, predict_model

# ---- load the saved pipeline once at startup ----
# Make sure the model files saved via `save_model(model, 'iforest_pipeline')`
# are in the working directory, or pass an absolute/relative path without extension.
MODEL_NAME = "iforest_pipeline"
try:
    clf = load_model(MODEL_NAME)
except Exception as e:
    # Surface a clear error early if the file isn't found or is incompatible
    raise RuntimeError(
        f"Could not load model '{MODEL_NAME}'. Make sure the saved model files exist "
        f"and you are using a compatible PyCaret version. Original error: {e}"
    )

# Helper: detect likely label/score column names returned by predict_model
def _find_col(candidates, cols):
    for c in candidates:
        if c in cols:
            return c
    return None

def score_csv(file, top_pct):
    """
    file: gr.File (CSV)
    top_pct: float (0–50) – highlight top X% highest anomaly scores
    """
    if file is None:
        return None, "Please upload a CSV with the same feature columns used during training.", None

    try:
        # Gradio gives a tempfile path; pandas can read it directly
        df = pd.read_csv(file.name)
    except Exception as e:
        return None, f"❌ Failed to read CSV: {e}", None

    try:
        preds = predict_model(clf, data=df.copy())  # non-destructive
    except Exception as e:
        return None, (
            "❌ predict_model failed. This usually means the CSV columns/types "
            "don’t match what the pipeline expects.\n\n"
            f"Error: {e}"
        ), None

    cols = list(preds.columns)
    label_col = _find_col(["Anomaly", "Label", "Outlier", "anomaly", "label", "outlier"], cols)
    score_col = _find_col(["Anomaly_Score", "Score", "anomaly_score", "score"], cols)

    if label_col:
        # Map common encodings to boolean-ish, leave others as-is
        mapping = {1: True, 0: False, -1: True, "1": True, "0": False, "-1": True, True: True, False: False}
        preds["is_anomaly"] = preds[label_col].map(mapping).fillna(preds[label_col])

    highlighted = 0
    quantile_cut = None
    if score_col and top_pct and top_pct > 0:
        k = max(1, int(len(preds) * (top_pct / 100.0)))
        # Highest scores considered most anomalous (convention in many algorithms; verify for your setup)
        ranks = preds[score_col].rank(method="first", ascending=False)
        mask_top = ranks <= k
        preds["TopPctFlag"] = mask_top
        highlighted = int(mask_top.sum())
        try:
            quantile_cut = float(preds[score_col].quantile(1 - top_pct / 100.0))
        except Exception:
            quantile_cut = None

    # Basic stats
    total = len(preds)
    anomaly_count = None
    if label_col:
        anomaly_mask = preds["is_anomaly"].astype(str).isin(["True", "true", "1", "-1"])
        anomaly_count = int(anomaly_mask.sum())

    stats_lines = [f"**Rows scored:** {total}"]
    if label_col is not None and anomaly_count is not None:
        stats_lines.append(f"**PyCaret-labeled anomalies:** {anomaly_count}")
    if score_col:
        stats_lines.append(f"**Score column:** `{score_col}`")
    if score_col and top_pct and top_pct > 0:
        qtxt = f" ~≥ {quantile_cut:.6g}" if quantile_cut is not None else ""
        stats_lines.append(f"**Top {top_pct:.0f}% score highlight:** {highlighted}{qtxt}")

    # Save a CSV for download
    out_path = os.path.join(tempfile.gettempdir(), "iforest_scored.csv")
    try:
        preds.to_csv(out_path, index=False)
    except Exception as e:
        # If writing fails, still return table + stats
        return preds, "\n\n".join(stats_lines) + f"\n\n⚠️ Could not create download file: {e}", None

    return preds, "\n\n".join(stats_lines), out_path


with gr.Blocks(title="PyCaret Anomaly Detector (Isolation Forest)") as demo:
    gr.Markdown(
        """
        # PyCaret Anomaly Detector (Isolation Forest)
        Upload a **CSV** with the same features used for training your `iforest_pipeline`.
        The app will run `predict_model` and return the predicted anomaly label and score (if available).

        **Notes**
        - The model is loaded once at startup via `pycaret.anomaly.load_model('iforest_pipeline')`.
        - The *Top X% highlight* is a convenience to flag the highest scores for triage; it does **not** alter PyCaret labels.
        """
    )
    with gr.Row():
        file_in = gr.File(label="Upload CSV", file_types=[".csv"])
        top_pct = gr.Slider(0, 50, value=5, step=1, label="Highlight top X% by score", info="Set to 0 to disable.")

    run_btn = gr.Button("Score CSV", variant="primary")

    out_df = gr.Dataframe(label="Scored Data", interactive=False, wrap=True)
    out_md = gr.Markdown()
    out_file = gr.File(label="Download Scored CSV")

    run_btn.click(fn=score_csv, inputs=[file_in, top_pct], outputs=[out_df, out_md, out_file])

if __name__ == "__main__":
    # Set share=True if you want a public link
    demo.launch()


Transformation Pipeline and Model Successfully Loaded
* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.
