In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import os

# Load the latest checkpoint
MODEL_DIR = "../models/intent_classifier/final"  # update if needed

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)

# Set to evaluation mode
model.eval()

# Label map (adjust if you used different label order)
id2label = {
    0: "needs_rag",
    1: "no_rag"
}

# Function to predict intent
def predict_intent(text: str):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        pred_id = torch.argmax(probs, dim=1).item()
        confidence = probs[0][pred_id].item()
    return id2label[pred_id], round(confidence, 3)

# Test inputs
examples = [
    "Hey! Just checking in, how are things on your side?",
    "Lovely weather today, isn’t it?",
    "Where can I find the updated health insurance policy for 2025?",
    "I need information about how salary reviews are done here.",
    "I’m feeling extremely drained after every workday lately.",
    "Honestly, I don’t think I can keep up this pace anymore.",
    "You guys are useless, nothing ever works here.",
    "This is a dumb bot, waste of time."
]

# Run predictions
for text in examples:
    label, confidence = predict_intent(text)
    print(f"🗣️ \"{text}\"\n→ Predicted: {label} (Confidence: {confidence})\n")


  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [None]:
# Run this in a Jupyter notebook cell

while True:
    text = input("📝 Enter a message (or type 'exit' to quit): ").strip()
    if text.lower() in ["exit", "quit"]:
        print("👋 Exiting...")
        break

    if not text:
        print("⚠️ Please enter a non-empty message.")
        continue

    label, confidence = predict_intent(text)
    print(f"🧠 Predicted intent: {label} (Confidence: {confidence})\n")


🧠 Predicted intent: moderation_required (Confidence: 0.996)

🧠 Predicted intent: moderation_required (Confidence: 0.916)

🧠 Predicted intent: no_rag (Confidence: 0.863)

🧠 Predicted intent: moderation_required (Confidence: 0.758)

🧠 Predicted intent: needs_rag (Confidence: 0.903)

🧠 Predicted intent: needs_rag (Confidence: 0.876)

🧠 Predicted intent: no_rag (Confidence: 0.981)

🧠 Predicted intent: needs_rag (Confidence: 0.955)

🧠 Predicted intent: no_rag (Confidence: 0.522)

⚠️ Please enter a non-empty message.


In [None]:
import json
import pandas as pd
from sklearn.metrics import classification_report
from transformers import TextClassificationPipeline
from datasets import Dataset

# 📁 Paths
TEST_PATH = "../data/test_split.json"

# 🔹 Load test data
with open(TEST_PATH, "r", encoding="utf-8") as f:
    test_data = json.load(f)

test_df = pd.DataFrame(test_data)
texts = test_df["text"].tolist()
true_labels = test_df["label"].tolist()

# 🔄 Map string labels to numeric
label_names = model.config.id2label
label2id = {v: int(k) for k, v in label_names.items()}

# 🧠 Predict using pipeline
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=False, device=0 if torch.cuda.is_available() else -1)
preds = pipe(texts)

# 🎯 Extract predicted labels
predicted_labels = [pred["label"] for pred in preds]

# 📊 Show evaluation metrics
print(classification_report(true_labels, predicted_labels, target_names=list(label2id.keys())))


Device set to use cpu


                     precision    recall  f1-score   support

moderation_required       0.99      1.00      1.00       120
          needs_rag       1.00      1.00      1.00       104
             no_rag       1.00      0.99      0.99        97

           accuracy                           1.00       321
          macro avg       1.00      1.00      1.00       321
       weighted avg       1.00      1.00      1.00       321

