In [12]:
# Setup
%pip install python-dotenv --upgrade --quiet langchain langchain-huggingface sentence-transformers langchain-community
%pip install groq python-dotenv
from dotenv import load_dotenv
load_dotenv()

import os
from langchain_huggingface import HuggingFaceEmbeddings

# Using a FREE, open-source model from Hugging Face
# 'all-MiniLM-L6-v2' is small, fast, and very good for English.
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



In [13]:
from groq import Groq
import os
from getpass import getpass

# Securely accept API key (hidden while typing)
GROQ_API_KEY = getpass("Enter your Groq API Key: ")

client = Groq(api_key=GROQ_API_KEY)

Enter your Groq API Key: ··········


In [14]:
MODEL_CONFIG = {
    "technical": {
        "system_prompt": (
            "You are a highly skilled Technical Support Engineer. "
            "You specialize in debugging code, analyzing error messages, and providing "
            "precise, step-by-step technical solutions. Be rigorous, code-focused, and exact. "
            "When relevant, provide corrected code snippets."
        ),
        "temperature": 0.7,
    },
    "billing": {
        "system_prompt": (
            "You are an empathetic Billing Support Specialist. "
            "You handle refund requests, subscription issues, and payment disputes with care. "
            "Be understanding, policy-driven, and clear about next steps. "
            "Always acknowledge the customer's frustration before explaining the resolution process."
        ),
        "temperature": 0.7,
    },
    "general": {
        "system_prompt": (
            "You are a friendly and helpful General Support Assistant. "
            "You handle casual inquiries, FAQs, and any questions that don't fall under "
            "technical or billing categories. Be warm, concise, and helpful."
        ),
        "temperature": 0.7,
    },
}

MODEL_NAME = "llama-3.3-70b-versatile"

In [15]:
def route_prompt(user_input: str) -> str:
    """
    Classifies user input into one of: 'technical', 'billing', or 'general'.
    Uses temperature=0 for consistent, deterministic classification.
    Returns ONLY the category name as a lowercase string.
    """
    routing_system_prompt = (
        "You are an intent classification engine. "
        "Classify the user's message into exactly one of these categories: "
        "technical, billing, general. "
        "Rules:\n"
        "- 'technical': bugs, errors, code issues, crashes, API problems\n"
        "- 'billing': charges, refunds, subscriptions, payments, invoices\n"
        "- 'general': everything else\n"
        "Return ONLY the single category word in lowercase. No punctuation, no explanation."
    )

    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": routing_system_prompt},
            {"role": "user", "content": user_input},
        ],
        temperature=0,  # Deterministic for routing
        max_tokens=10,
    )

    category = response.choices[0].message.content.strip().lower()

    # Fallback to 'general' if unexpected output
    if category not in MODEL_CONFIG:
        category = "general"

    return category

In [16]:
def process_request(user_input: str) -> str:
    """
    Orchestrates the full MoE pipeline:
    1. Routes the input to the correct expert category.
    2. Loads the corresponding system prompt.
    3. Calls the LLM with that expert configuration.
    4. Returns the expert's response.
    """
    # Step 1: Route to the right expert
    category = route_prompt(user_input)
    print(f"[Router] Classified as: '{category}'")

    # Step 2: Select expert config
    expert_config = MODEL_CONFIG[category]

    # Step 3: Call the LLM with the expert's system prompt
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": expert_config["system_prompt"]},
            {"role": "user", "content": user_input},
        ],
        temperature=expert_config["temperature"],
        max_tokens=512,
    )

    # Step 4: Return the expert's answer
    return response.choices[0].message.content.strip()

In [17]:
query_1 = "My python script is throwing an IndexError on line 5."
print("=" * 60)
print(f"User Query: {query_1}")
print("=" * 60)
answer_1 = process_request(query_1)
print(f"\n[Expert Response]\n{answer_1}\n")

User Query: My python script is throwing an IndexError on line 5.
[Router] Classified as: 'technical'

[Expert Response]
To assist you with the `IndexError` on line 5 of your Python script, I'll need more information about your code. However, I can guide you through a general troubleshooting process.

### Understanding IndexError
An `IndexError` typically occurs when you try to access an element in a list (or other sequence type) using an index that is out of range. For example, if you have a list with 3 elements (indices 0, 1, and 2), trying to access the element at index 3 would result in an `IndexError`.

### General Steps to Debug
1. **Check the Line of Code**: Look at line 5 of your script and identify the operation that's causing the error. It's likely an indexing operation on a list, tuple, or string.
2. **Variable Inspection**: Before line 5, add some debug print statements to inspect the variables involved in the indexing operation. This can help you understand the length and 

In [18]:
query_2 = "I was charged twice for my subscription this month."
print("=" * 60)
print(f"User Query: {query_2}")
print("=" * 60)
answer_2 = process_request(query_2)
print(f"\n[Expert Response]\n{answer_2}\n")

User Query: I was charged twice for my subscription this month.
[Router] Classified as: 'billing'

[Expert Response]
I'm so sorry to hear that you were charged twice for your subscription this month. I can imagine how frustrating that must be for you, especially when you're expecting to only see one charge on your statement. I'm here to help you resolve this issue as quickly and fairly as possible.

To get started, I'll need to look into your account and verify the duplicate charge. Can you please confirm your subscription details, such as the type of subscription you have and the date you were charged? Additionally, I may need to ask for some additional information to ensure that I'm investigating the correct transaction.

Once I've verified the duplicate charge, I'll work with our team to process a refund for the incorrect charge as soon as possible. Please know that we have a standard refund process in place, and I'll guide you through the next steps. If you have any questions or co

In [19]:
query_3 = "What are your business hours?"
print("=" * 60)
print(f"User Query: {query_3}")
print("=" * 60)
answer_3 = process_request(query_3)
print(f"\n[Expert Response]\n{answer_3}\n")

User Query: What are your business hours?
[Router] Classified as: 'general'

[Expert Response]
Our business hours are Monday to Friday, 9:00 AM to 5:00 PM (EST). We're closed on weekends and major holidays. If you have any questions or need assistance outside of these hours, feel free to leave a message, and we'll get back to you as soon as possible during our next business day. How can I assist you today?



BONUS

In [20]:
def get_mock_bitcoin_price() -> str:
    """Mock function simulating a live data fetch for Bitcoin price."""
    # In a real scenario, this would call an API like CoinGecko
    mock_price = 67_432.15
    return f"The current price of Bitcoin (BTC) is ${mock_price:,.2f} USD (mock data)."


def route_prompt_with_tool(user_input: str) -> str:
    """
    Extended router that also detects 'tool' intent for live data queries.
    Returns one of: 'technical', 'billing', 'general', 'tool'.
    """
    routing_system_prompt = (
        "You are an intent classification engine. "
        "Classify the user's message into exactly one of these categories: "
        "technical, billing, general, tool. "
        "Rules:\n"
        "- 'technical': bugs, errors, code issues, crashes\n"
        "- 'billing': charges, refunds, subscriptions, payments\n"
        "- 'tool': requests for live/real-time data like cryptocurrency prices, stock prices, weather\n"
        "- 'general': everything else\n"
        "Return ONLY the single category word in lowercase. No punctuation, no explanation."
    )

    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": routing_system_prompt},
            {"role": "user", "content": user_input},
        ],
        temperature=0,
        max_tokens=10,
    )

    category = response.choices[0].message.content.strip().lower()
    if category not in list(MODEL_CONFIG.keys()) + ["tool"]:
        category = "general"
    return category


def process_request_with_tool(user_input: str) -> str:
    """
    Extended orchestrator that handles the 'tool' expert by calling
    a mock data-fetching function instead of an LLM.
    """
    category = route_prompt_with_tool(user_input)
    print(f"[Router] Classified as: '{category}'")

    # Handle tool use separately — call the function, not the LLM
    if category == "tool":
        if "bitcoin" in user_input.lower():
            return get_mock_bitcoin_price()
        return "Tool use requested, but no matching tool was found."

    # Otherwise, use the normal expert pipeline
    expert_config = MODEL_CONFIG[category]
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {"role": "system", "content": expert_config["system_prompt"]},
            {"role": "user", "content": user_input},
        ],
        temperature=expert_config["temperature"],
        max_tokens=512,
    )
    return response.choices[0].message.content.strip()


# Test the Tool Use Expert
query_4 = "What is the current price of Bitcoin?"
print("=" * 60)
print(f"User Query: {query_4}")
print("=" * 60)
answer_4 = process_request_with_tool(query_4)
print(f"\n[Expert Response]\n{answer_4}\n")

User Query: What is the current price of Bitcoin?
[Router] Classified as: 'tool'

[Expert Response]
The current price of Bitcoin (BTC) is $67,432.15 USD (mock data).

