# group_d_05_api_and_gradio_demo.ipynb

This notebook loads the fine-tuned DistilBERT ticket classifier from `models/distilbert-ticket-classifier/`, defines an inference function that returns predicted label and confidence, and launches an interactive Gradio demo directly in the notebook (`share=True`).

The notebook is organized into sequential code blocks with comments explaining each line.


In [None]:
# Install required packages in the notebook environment if they not already installed.
try:
    import gradio as gr  # noqa: F401
    from transformers import AutoTokenizer, AutoModelForSequenceClassification  # noqa: F401
except Exception:
    # Install transformers, gradio and torch if missing.
    %pip install -q transformers[torch] gradio torch --upgrade
    import gradio as gr  # noqa: F401
    from transformers import AutoTokenizer, AutoModelForSequenceClassification  # noqa: F401


In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn


In [None]:
# Import libraries required for inference and display
from pathlib import Path
import json
import torch
import numpy as np

# Transformers components for loading tokenizer and model
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax  # to convert logits to probabilities

# Gradio for the interactive demo
import gradio as gr

# Determine whether a CUDA GPU is available and set the device accordingly.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)


In [None]:
# Path to the fine-tuned DistilBERT model directory
MODEL_DIR = Path("../models/distilbert-ticket-classifier")

# Define the label names in the same order the model was trained with.
LABELS = ['other', 'technical', 'account', 'billing']

# Basic checks to help user debug common issues.
if not MODEL_DIR.exists():
    raise FileNotFoundError(f"Model directory not found at: {MODEL_DIR.resolve()}. Please ensure the path is correct.")

print("Model directory found:", MODEL_DIR.resolve())
print("Labels:", LABELS)


In [None]:
# Load the tokenizer and model from the model directory.
tokenizer = AutoTokenizer.from_pretrained(str(MODEL_DIR))

# Load model weights. Use local files in MODEL_DIR.
model = AutoModelForSequenceClassification.from_pretrained(str(MODEL_DIR))
# Move model to the selected device (GPU if available, otherwise CPU)
model.to(device)
model.eval()  # Set the model to evaluation mode (disables dropout, etc.)

# Print model summary: number of labels and device placement.
try:
    num_labels = model.config.num_labels
except Exception:
    num_labels = None
print(f"Loaded model with num_labels={num_labels} and device={device}.")


In [None]:
# Define a prediction function that accepts a text string and returns a label and confidence score.
def predict_ticket_label(text, top_k=1):
    """Predict label(s) for a single input text.
    
    Args:
        text (str): The input ticket or customer message.
        top_k (int): Number of top labels to return (default 1).
    
    Returns:
        If top_k == 1: (label: str, confidence: float)
        If top_k > 1: list of tuples [(label, confidence), ...]
    """
    # Tokenize the input text. return_tensors='pt' returns PyTorch tensors.
    inputs = tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=512
    )
    
    # Move tensors to the same device as the model (GPU/CPU).
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Disable gradient computation for inference to save memory and computation time.
    with torch.no_grad():
        outputs = model(**inputs)
        # The model outputs logits for each class in outputs.logits
        logits = outputs.logits.cpu().numpy()[0]
    
    # Convert logits to probabilities using softmax for interpretability.
    probs = softmax(logits)
    
    # Get indices of top_k predictions sorted by probability descending.
    top_indices = probs.argsort()[::-1][:top_k]
    
    # Build results as (label, confidence) pairs.
    results = [(LABELS[idx], float(probs[idx])) for idx in top_indices]
    
    if top_k == 1:
        return results[0]  # return (label, confidence)
    return results  # return list of (label, confidence)


In [None]:
# Quick sanity check on the prediction function with example texts.
examples = [
    "My internet connection is down and I cannot access my email.",
    "I was charged twice on my last invoice. Please help.",
    "How do I change my account password?",
    "I want to update my billing information.",
    "This is an unrelated question not covered by categories."
]

for ex in examples:
    label, conf = predict_ticket_label(ex)
    print(f"Input: {ex}\nPredicted label: {label}, confidence: {conf:.3f}\n")


In [None]:
# Define a wrapper function compatible with Gradio that returns a human-friendly output.
def gradio_predict(text):
    """Gradio wrapper that returns a formatted string with predicted label and confidence."""
    label, conf = predict_ticket_label(text)
    # Format confidence as percentage with two decimals
    return f"Label: {label} | Confidence: {conf*100:.2f}%"

# Build the Gradio interface components:
iface = gr.Interface(
    fn=gradio_predict,  # function to call
    inputs=gr.Textbox(lines=2, placeholder='Enter customer support message here...'),  # text input box
    outputs=gr.Textbox(label='Prediction'),  # text output box
    title='DistilBERT Ticket Classifier Demo',  # UI title
    description='Enter a customer support message and see predicted category and confidence.'  # short description
)

# Launch the Gradio demo in the notebook and create a public share link.
iface.launch(share=True, inline=True)


In [None]:
app = FastAPI(title="Ticket Classifier API")

class TicketRequest(BaseModel):
    text: str
    top_k: int = 1

@app.post("/predict")
def predict_ticket(request: TicketRequest):

    """API endpoint to predict ticket label(s) for input text.
    
    Args:
        request (TicketRequest): Input request with text and top_k.
    
    Returns:
        dict: Prediction results with labels and confidences.
    """
    results = predict_ticket_label(request.text, top_k=request.top_k)
    if request.top_k == 1:
        label, confidence = results
        return {"label": label, "confidence": round(confidence, 4)}
    else:
        return [{"label": label, "confidence": round(conf, 4)} for label, conf in results]

# To run API server manually (commented out to avoid blocking notebook):    
#if __name__ == "__main__": 
#    uvicorn.run(app, host="0.0.0.0", port=8000) 

In [None]:
# Update Gradio interface to display Top-3 predictions in a table format.

def gradio_predict_topk(text):
    """Return top-3 predictions with confidence for Gradio visualization."""
    results = predict_ticket_label(text, top_k=3)
    output = {label: f"{conf*100:.2f}%" for label, conf in results}
    return output

iface = gr.Interface(
    fn=gradio_predict_topk,
    inputs=gr.Textbox(lines=3, placeholder="Enter customer message here..."),
    outputs=gr.Label(label="Top Predictions (Confidence %)"),
    title="DistilBERT Ticket Classifier Demo (Enhanced)",
    description="Displays top-3 predicted categories and confidence scores.",
)

iface.launch(share=True, inline=True)
