# Plotly Dash Dashboard

This dashboard allows interactive exploration of the AI Text Detector results. It has three tabs for:
1. **EDA** – Visualizing data distributions (class balance, article length, etc.).
2. **Evaluation** – Showing model performance metrics (confusion matrix, ROC curves).
3. **Inference** – Providing an interface to input new text and get predictions with explanations.

Run this app to launch the dashboard locally and interact with the model.


In [1]:
# move up one level so that works
import os
os.chdir(os.path.abspath(os.path.join(os.getcwd(), "..")))
print("new cwd:", os.getcwd())


new cwd: c:\Testing\Final_Year_Project\AI-Text-Detection-Tool


In [2]:
import dash
from dash import dcc, html
import base64, logging
from pathlib import Path
from typing import Optional
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import logging
from utils import dashboard_utils  # for LIME explanations
import pandas as pd
import plotly.express as px

In [3]:

# Initialize Dash app
app = dash.Dash(__name__)
app.title = "AI Text Detector Dashboard"

# ——— 1) Auto‑locate your diagrams/ directory ————————————————
def find_diagrams_dir(start: Path = Path.cwd(), marker: str = "diagrams") -> Optional[Path]:
    """
    Walk upward from `start` through its parents until you find a folder named `marker`.
    Return the Path to that folder, or None if not found.
    """
    for folder in (start, *start.parents):
        candidate = folder / marker
        if candidate.is_dir():
            return candidate
    logging.warning(f"Couldn’t locate a '{marker}/' directory under {start}")
    return None

DIAGRAMS_DIR = find_diagrams_dir()

# ——— 2) Helper to read & encode image files as base64 URIs, safely ——————
def encode_image(filename: str) -> Optional[str]:
    """
    Given a filename (e.g. "class_distribution.png"), look in DIAGRAMS_DIR,
    read it if present, and return a data URI. If missing, log & return None.
    """
    if DIAGRAMS_DIR is None:
        return None

    img_path = (DIAGRAMS_DIR / filename).resolve()
    if not img_path.exists():
        logging.warning(f"Image not found, skipping: {img_path}")
        return None

    try:
        raw = img_path.read_bytes()
        b64 = base64.b64encode(raw).decode("utf-8")
        return f"data:image/png;base64,{b64}"
    except Exception as e:
        logging.warning(f"Failed to encode {img_path}: {e}")
        return None

# ——— 3) Load pre‑generated plots (won’t crash if missing) ————————
class_dist_img  = encode_image("class_distribution.png")
length_dist_img = encode_image("length_distribution.png")
conf_matrix_img = encode_image("confusion_matrix.png")
roc_curves_img  = encode_image("roc_curves.png")

# ——— 4) Define app layout ————————————————————————————————
app.layout = html.Div([
    html.H1(
        "AI‑Generated Text Detection Dashboard",
        style={"textAlign": "center", "marginBottom": "1em"}
    ),
    dcc.Tabs(id="tabs", value="tab-eda", children=[
        dcc.Tab(label="EDA",        value="tab-eda"),
        dcc.Tab(label="Evaluation", value="tab-eval"),
        dcc.Tab(label="Inference",  value="tab-inf"),
    ]),
    html.Div(id="tab-content")
])

# (Callbacks will go here in later cells…)




In [4]:


trends_df = pd.read_csv('data/trends_by_year.csv')

# Melt into long format for plotting
records = []
for _, row in trends_df.iterrows():
    year = int(row['year'])
    records.extend([
        {'year': year, 'content_type': 'Human-written',  'count': row['Human-written'],     'percentage': row['human_percent']},
        {'year': year, 'content_type': 'AI-paraphrased', 'count': row['AI-paraphrased'],   'percentage': row['ai_paraphrased_percent']},
        {'year': year, 'content_type': 'AI-generated',  'count': row['AI-generated'],    'percentage': row['ai_generated_percent']},
    ])
trends_long = pd.DataFrame(records)

# Line chart of proportions over time
fig_line = px.line(
    trends_long, x='year', y='percentage', color='content_type', markers=True,
    labels={'percentage':'Proportion','content_type':'Type','year':'Year'},
)
fig_line.update_layout(
    title="AI vs Human Content Over Time",
    yaxis_tickformat='%'
)
fig_line.update_traces(
    hovertemplate="Year %{x}<br>%{legendgroup}: %{y:.1%} (%{customdata} articles)",
    customdata=trends_long['count']
)

# Animated bar chart
fig_bar = px.bar(
    trends_long, x='content_type', y='percentage', color='content_type',
    animation_frame='year', animation_group='content_type', range_y=[0,1],
    category_orders={'content_type':['Human-written','AI-paraphrased','AI-generated']},
    labels={'percentage':'Proportion','content_type':'Type','year':'Year'}
)
fig_bar.update_layout(
    title="Content-Type Timeline (2015–2025)",
    yaxis_tickformat='%'
)
fig_bar.update_traces(
    hovertemplate="%{x}: %{y:.1%} (%{customdata} articles)",
    customdata=trends_long['count']
)

# Initialize Dash app
app = dash.Dash(__name__)
app.layout = html.Div([
    dcc.Tabs(id="tabs", value="tab-trends", children=[
        dcc.Tab(label="Trends", value="tab-trends"),
        dcc.Tab(label="Evaluation", value="tab-eval"),
        # ... other tabs ...
    ]),
    html.Div(id="tab-content")
])

FileNotFoundError: [Errno 2] No such file or directory: 'data/trends_by_year.csv'

In [None]:
# Load model & tokenizer once
model_path = "diagrams/final_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

# Class names in the correct order
label_names = ["Human-written", "AI-paraphrased", "AI-generated"]


In [6]:
from dash.dependencies import Input, Output

@app.callback(Output("tab-content", "children"), Input("tabs", "value"))
def render_tab_content(tab):
    if tab == "tab-eda":
        return html.Div([
            html.H3("Exploratory Data Analysis", style={"textAlign": "center", "marginTop": "1em"}),
            html.Img(src=class_dist_img, style={"width": "45%", "display": "inline-block", "padding": "1em"}),
            html.Img(src=length_dist_img, style={"width": "45%", "display": "inline-block", "padding": "1em"}),
            html.P(
                "The dataset is fairly balanced across classes. "
                "Human-written texts are generally longer than AI-generated or AI-paraphrased ones.",
                style={"textAlign": "center", "fontStyle": "italic", "marginTop": "0.5em"}
            )
        ])
    elif tab == "tab-eval":
        # Evaluation tab: show confusion matrix and ROC curves
        return html.Div([
            html.H3("Model Evaluation", style={"textAlign": "center", "marginTop": "1em"}),
            html.Img(src=conf_matrix_img, style={"width": "40%", "display": "inline-block", "padding": "1em"}),
            html.Img(src=roc_curves_img, style={"width": "50%", "display": "inline-block", "padding": "1em"}),
            html.P(
                "Overall accuracy ~91%. The model excels at identifying human-written text (near 99% recall) "
                "and mostly confuses paraphrased vs directly AI-generated text.",
                style={"textAlign": "center", "fontStyle": "italic", "marginTop": "0.5em"}
            )
        ])
    elif tab == "tab-inf":
        # Inference tab: textarea + button + placeholder for results
        return html.Div([
            html.H3("Try the Detector", style={"textAlign": "center", "marginTop": "1em"}),
            dcc.Textarea(
                id="input-text",
                placeholder="Enter article text here...",
                style={"width": "80%", "height": "100px"}
            ),
            html.Br(),
            html.Button("Detect", id="detect-button", n_clicks=0, style={"marginTop": "0.5em"}),
            html.Div(id="result-output", style={"marginTop": "1em"})
        ])
    elif tab == "tab-trends":
        return html.Div([
            html.H3("AI Content Trends (2015–2025)",
                    style={"textAlign":"center","marginTop":"1em"}),
            dcc.Graph(id="trend-line",
                      figure=fig_line,
                      config={"displayModeBar": False}),
            dcc.Graph(id="trend-bar",
                      figure=fig_bar,
                      config={"displayModeBar": False}),
            html.P("Use the slider ▶ to animate year-by-year changes. "
                   "Hover for exact percentages and counts.",
                   style={"textAlign":"center",
                          "fontStyle":"italic",
                          "marginTop":"0.5em"})
        ])
    return html.Div() 


In [None]:
from dash.dependencies import Input, Output

@app.callback(
    Output("result-output", "children"),
    [Input("detect-button", "n_clicks"), Input("input-text", "value")]
)
def run_detection(n_clicks, input_text):
    if not n_clicks or not input_text:
        return ""
    # 1) Tokenize & predict
    tokens = tokenizer(
        input_text, return_tensors="pt",
        truncation=True, padding=True, max_length=512
    )
    with torch.no_grad():
        logits = model(**tokens).logits
        probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
    pred_idx = int(np.argmax(probs))
    pred_label = label_names[pred_idx]
    confidence = probs[pred_idx]
    logging.info(f"Predicted {pred_label} ({confidence:.3f})")

    # 2) Get LIME explanation (top 6 words)
    explanation = dashboard_utils.explain_prediction(
        input_text, tokenizer, model, num_features=6
    )
    # Build a lookup of word→weight
    weights = {w.lower(): wt for w, wt in explanation}
    max_w = max(abs(wt) for wt in weights.values()) if weights else 1.0

    # 3) Render probability bars
    prob_rows = []
    for i, name in enumerate(label_names):
        pct = probs[i] * 100
        bar = html.Div(style={
            "width": f"{pct}%", "height": "8px", "backgroundColor": "#4c8bf5"
        })
        prob_rows.append(
            html.Div([bar, html.Span(f" {name}: {pct:.1f}%")],
                     style={"display":"flex","alignItems":"center","margin":"4px 0"})
        )

    # 4) Highlight important words
    nodes = []
    for word in input_text.split():
        key = word.strip(".,!?;:").lower()
        if key in weights:
            w = weights[key]
            opacity = min(abs(w)/max_w,1.0)
            color = f"rgba(255,165,0,{opacity:.2f})"
            nodes.append(html.Span(word+" ",
                                   style={"backgroundColor": color},
                                   title=f"Weight: {w:+.2f}"))
        else:
            nodes.append(html.Span(word+" "))

    # 5) Compose output
    return html.Div([
        html.H4(f"Prediction: {pred_label}", style={"textAlign":"center"}),
        html.P(f"Confidence: {confidence*100:.2f}%", style={"textAlign":"center"}),
        html.Div(prob_rows, style={"margin":"0.5em 0"}),
        html.P(nodes, style={"lineHeight":"1.8em"})
    ])



In [7]:
if __name__ == "__main__":
    app.run_server(debug=True)


## Dashboard Design & Integration

- **EDA Tab**: validates data assumptions (class balance, length differences).
- **Evaluation Tab**: shows ~91% accuracy, highlights AI-para vs AI-gen confusion.
- **Inference Tab**: live text input with LIME highlights (hover shows weights).

This dashboard is ideal for presentations—walk examiners through data, model performance, and explainability in a unified interface.

Alongside this, the **React** app and **Chrome extension** offer real-world workflows:
- React: single/batch analysis with an intuitive sidebar, dark mode, and drag-drop.
- Extension: scan any web page in-place, highlighting the exact cues that triggered the model.

🔑 **Note:** Keep the FastAPI backend (`api_server.py`) running.  
Dash can run standalone (it loads the model internally), but for full parity use the API.

> The modular design means you can upgrade each component independently—good software practice for extending this project further.
