In [None]:
import pandas as pd
import numpy as np
import joblib
import gradio as gr
from datetime import datetime


In [None]:

# Load models
scaler = joblib.load("scaler.pkl")
anom_model = joblib.load("anomaly_model.pkl")
failure_model = joblib.load("failure_model.pkl")

# Percentiles
LOW_THR = 0.80
MED_THR = 0.95

# Load dataset and compute thresholds
df = pd.read_parquet("datasets/new_summit_gpu_clean.parquet")
X_all = df.drop(columns=["is_failure"])
X_all_scaled = scaler.transform(X_all)

df["anomaly_score"] = -anom_model.decision_function(X_all_scaled)
normal_scores = df[df["is_failure"] == 0]["anomaly_score"]
low_thr, med_thr = normal_scores.quantile([LOW_THR, MED_THR])

history = []


def assign_risk(score):
    if score <= low_thr:
        return "Low risk"
    elif score <= med_thr:
        return "Medium risk"
    else:
        return "High risk"


def risk_bar(score):
    max_score = med_thr * 1.5
    percent = min(score / max_score, 1) * 100

    if score <= low_thr:
        color = "green"
    elif score <= med_thr:
        color = "orange"
    else:
        color = "red"

    return f"""
    <div style="width:100%; background:#eee; border-radius:6px; overflow:hidden;">
        <div style="width:{percent:.0f}%; background:{color}; height:16px;"></div>
    </div>
    """


def evaluate_row(file):
    try:
        sample = pd.read_csv(file.name)

        # Force same structure as training data
        sample = sample[X_all.columns]

        if sample.shape[0] != 1:
            return "<b style='color:red;'>ERROR:</b> Upload exactly ONE row.", None, None

        X_scaled = scaler.transform(sample)
        anomaly_score = -anom_model.decision_function(X_scaled)[0]
        risk = assign_risk(anomaly_score)

        # Risk label
        if risk == "Low risk":
            color = "green"
            label = "LOW RISK"
            status = "Normal GPU state"
        elif risk == "Medium risk":
            color = "orange"
            label = "MEDIUM RISK"
            status = "Abnormal but not failure-like"
        else:
            color = "red"
            label = "HIGH RISK"
            status = "Suspicious GPU behavior"

        # Stage 1 output
        html = f"""
        <div style="font-family:Arial;">
        <h3>Risk Assessment</h3>

        <b>ANOMALY SCORE:</b> {anomaly_score:.4f}<br><br>

        <b>RISK THRESHOLDS</b><br>
        Low ≤ {low_thr:.4f}<br>
        Medium ≤ {med_thr:.4f}<br><br>

        <b style="color:{color}; font-size:18px;">{label}</b><br>
        {risk_bar(anomaly_score)}<br>

        <b>STATE:</b> {status}<br><br>
        """

        verdict = "Pattern is normal"
        fail_prob = "Not evaluated because risk is low"

        # Stage 2 — NO anomaly score added (this avoids shape crash)
        if risk == "High risk":
            X_stage2 = np.column_stack([X_scaled, [anomaly_score]])
            fail_prob = float(failure_model.predict_proba(X_stage2)[0][1])
            verdict = "DBE FAILURE PATTERN" if fail_prob >= 0.5 else "Abnormal but not DBE-like"
            verdict_color = "red" if fail_prob >= 0.5 else "orange"

            html += f"""
            <h3>Failure Analysis</h3>

            <b>FAILURE PROBABILITY:</b> {fail_prob:.3f}<br>
            <div style="width:100%; background:#eee; border-radius:6px;">
                <div style="width:{fail_prob*100:.0f}%; background:{verdict_color}; height:16px;"></div>
            </div><br>

            <b style="color:{verdict_color}; font-size:18px;">{verdict}</b><br>
            """

        html += "</div>"

        # History log
        history.append([
            datetime.now().strftime("%H:%M:%S"),
            round(anomaly_score, 4),
            risk,
            verdict
        ])

        history_df = pd.DataFrame(history, columns=["Time", "Anomaly Score", "Risk", "Verdict"])

        # Report export
        report = pd.DataFrame({
            "Metric": ["Anomaly Score", "Risk Level", "Failure Probability", "Final Verdict"],
            "Value": [round(anomaly_score, 4), risk, fail_prob, verdict]
        })

        report_file = "result_report.csv"
        report.to_csv(report_file, index=False)

        return html, history_df, report_file

    except Exception as e:
        return f"<b style='color:red;'>ERROR:</b> {str(e)}", None, None


# UI
app = gr.Interface(
    fn=evaluate_row,
    inputs=gr.File(label="Upload ONE telemetry row (CSV)"),
    outputs=[
        gr.HTML(label="Assessment Output"),
        gr.Dataframe(label="Run History"),
        gr.File(label="Download Report")
    ],
    title="GPU Fault Detection Dashboard",
    description="Stage 1: Risk Scoring | Stage 2: DBE Detection"
)

app.launch()


* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Users\Latif\AppData\Local\Programs\Python\Python311\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Latif\AppData\Local\Programs\Python\Python311\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Latif\AppData\Local\Programs\Python\Python311\Lib\site-packages\fastapi\applications.py", line 1133, in __call__
    await super().__call__(scope, receive, send)
  File "c:\Users\Latif\AppData\Local\Programs\Python\Python311\Lib\site-packages\starlette\applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "c:\Users\Latif\AppData\Local\Programs\Python\Python311\Li