In [1]:
!pip install transformers sentence-transformers openai chromadb fastapi uvicorn nest-asyncio


Collecting transformers
  Using cached transformers-4.53.1-py3-none-any.whl.metadata (40 kB)
Collecting sentence-transformers
  Using cached sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting openai
  Using cached openai-1.93.1-py3-none-any.whl.metadata (29 kB)
Collecting chromadb
  Using cached chromadb-1.0.15-cp39-abi3-win_amd64.whl.metadata (7.1 kB)
Collecting fastapi
  Using cached fastapi-0.116.0-py3-none-any.whl.metadata (28 kB)
Collecting uvicorn
  Using cached uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Using cached huggingface_hub-0.33.2-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.3.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp312-cp312-win_amd64.wh


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from transformers import pipeline

# Load a general-purpose emotion classifier (can be fine-tuned later)
intent_classifier = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion")

# Intent Mapping
def map_intent(label):
    if label in ["joy", "love"]:
        return "Feature Request"
    elif label in ["anger", "fear", "sadness"]:
        return "Billing"
    else:
        return "Technical Support"

def detect_intent(query):
    label = intent_classifier(query)[0]["label"]
    return map_intent(label)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


In [4]:
!pip install python-dotenv




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import requests
import openai
import time
from dotenv import load_dotenv
import os
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

class LLMRouter:
    def __init__(self, use_openai=False):
        self.use_openai = use_openai

    def query(self, prompt):
        try:
            if self.use_openai:
                start = time.time()
                response = openai.ChatCompletion.create(
                    model="gpt-4",
                    messages=[{"role": "user", "content": prompt}]
                )
                latency = time.time() - start
                return response.choices[0].message.content.strip(), latency
            else:
                start = time.time()
                res = requests.post("http://localhost:11434/api/generate", json={
                    "model": "llama3",
                    "prompt": prompt,
                    "stream": False
                })
                latency = time.time() - start
                return res.json()['response'], latency
        except Exception:
            self.use_openai = True
            return self.query(prompt)


In [7]:
# Simulate sample retrieval context for each intent
def retrieve_context(intent):
    if intent == "Technical Support":
        return "To reset your password, go to settings > security > reset password."
    elif intent == "Billing":
        return "Our pricing tiers include Basic, Pro, and Enterprise. Missed payments result in account hold."
    elif intent == "Feature Request":
        return "We are planning 2FA support in Q3 and AI summarization in Q4 per roadmap."

# Prompt template builder
def build_prompt(intent, query, context):
    if intent == "Technical Support":
        return f"Using the following documentation:\n{context}\n\nAnswer this tech support query:\n{query}"
    elif intent == "Billing":
        return f"Based on billing policies:\n{context}\n\nAnswer the customer question:\n{query}"
    elif intent == "Feature Request":
        return f"Based on our product roadmap:\n{context}\n\nAddress this feature request:\n{query}"


In [8]:
from sentence_transformers import SentenceTransformer, util

embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Cosine similarity with ideal response
def relevance_score(response, ideal):
    return float(util.cos_sim(embedder.encode(response), embedder.encode(ideal))[0][0])

# Overlap-based context utilization
def context_utilization_score(response, context):
    context_words = set(context.lower().split())
    response_words = set(response.lower().split())
    return len(context_words & response_words) / len(context_words)


In [9]:
# Setup
llm = LLMRouter(use_openai=False)

# Sample Query
query = "Can you add support for two-factor authentication?"
ideal_answer = "Two-factor authentication is a planned feature for Q3 as per our roadmap."

# 1. Intent detection
intent = detect_intent(query)
print("Intent Detected:", intent)

# 2. Retrieve context and build prompt
context = retrieve_context(intent)
prompt = build_prompt(intent, query, context)

# 3. Generate response
response, latency = llm.query(prompt)
print("LLM Response:", response)

# 4. Evaluation
relevance = relevance_score(response, ideal_answer)
context_util = context_utilization_score(response, context)

print("\n📊 Evaluation Metrics")
print("Relevance Score:", round(relevance, 3))
print("Context Utilization:", round(context_util, 3))
print("Latency (sec):", round(latency, 2))


Intent Detected: Feature Request
LLM Response: I'm happy to help! Based on our current product roadmap, we're planning to implement two-factor authentication (2FA) in Q3. This means that 2FA support is already slated for development and will be available in the coming quarter.

Since this feature is already planned, I won't be able to add it to our roadmap at this time. However, if you'd like to provide any specific requirements or feedback on how you envision 2FA working in our product, I'm more than happy to take that into consideration and pass it along to the development team.

If there's anything else I can help with, please don't hesitate to ask!

📊 Evaluation Metrics
Relevance Score: 0.809
Context Utilization: 0.385
Latency (sec): 26.51


In [11]:
!pip install pandas

Collecting pandas
  Using cached pandas-2.3.1-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.3.1-cp312-cp312-win_amd64.whl (11.0 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.3.1 pytz-2025.2 tzdata-2025.2



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import pandas as pd

# Simulated test set
data = [
    ("How do I reset my password?", "Technical Support", "You can reset your password from settings."),
    ("What happens if I miss a payment?", "Billing", "Missed payments result in a temporary account hold."),
    ("Can you add AI-based tagging?", "Feature Request", "AI-based tagging is being explored for future releases.")
]

results = []

for query, true_intent, ideal_answer in data:
    pred_intent = detect_intent(query)
    context = retrieve_context(pred_intent)
    prompt = build_prompt(pred_intent, query, context)
    response, latency = llm.query(prompt)

    results.append({
        "Query": query,
        "True Intent": true_intent,
        "Predicted Intent": pred_intent,
        "Response": response,
        "Relevance": relevance_score(response, ideal_answer),
        "Context Utilization": context_utilization_score(response, context),
        "Latency": latency
    })

df = pd.DataFrame(results)
df["Intent Accuracy"] = df["True Intent"] == df["Predicted Intent"]
df


Unnamed: 0,Query,True Intent,Predicted Intent,Response,Relevance,Context Utilization,Latency,Intent Accuracy
0,How do I reset my password?,Technical Support,Billing,"To reset your password, please follow these st...",0.66751,0.285714,26.213994,False
1,What happens if I miss a payment?,Billing,Billing,"According to our billing policies, if you miss...",0.676389,0.214286,5.884278,True
2,Can you add AI-based tagging?,Feature Request,Feature Request,Thank you for the context! Based on our produc...,0.711357,0.461538,32.393183,True


In [13]:
print("✅ Intent Accuracy:", df["Intent Accuracy"].mean())
print("✅ Avg Relevance:", df["Relevance"].mean())
print("✅ Avg Context Utilization:", df["Context Utilization"].mean())
print("✅ Avg Latency:", df["Latency"].mean())


✅ Intent Accuracy: 0.6666666666666666
✅ Avg Relevance: 0.6850851774215698
✅ Avg Context Utilization: 0.32051282051282054
✅ Avg Latency: 21.497151533762615
