In [7]:
!pip install groq


Collecting groq
  Downloading groq-1.0.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-1.0.0-py3-none-any.whl (138 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/138.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.3/138.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-1.0.0


In [8]:
from getpass import getpass
import os

os.environ["GROQ_API_KEY"] = getpass("Enter your Groq API key: ")


Enter your Groq API key: ··········


In [9]:
from groq import Groq
import os

client = Groq(api_key=os.environ["GROQ_API_KEY"])


In [10]:
import json

def classify_complaint(text):
    prompt = f"""
You are a complaint classification system.

Classify the complaint into exactly one of:
- food_spoiled
- food_tampered
- late_delivery
- wrong_item

Return JSON only:
{{
  "category": "",
  "confidence": 0.0
}}

Complaint:
{text}
"""

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[
            {"role": "system", "content": "You are a strict JSON-only classification engine."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    raw_output = response.choices[0].message.content

    try:
        return json.loads(raw_output)
    except:
        return {"error": "Invalid JSON", "raw": raw_output}


In [11]:
result = classify_complaint("The food quality was not good")
print(result)
print(type(result))


{'category': 'food_spoiled', 'confidence': 0.8}
<class 'dict'>


In [12]:
import json

policies = [
  {
    "id": "food_spoiled",
    "title": "Spoiled Food Policy",
    "content": "If a customer reports spoiled food, apologize sincerely. Ask for image proof. Offer a full refund. If the same customer reports spoiled food more than twice in 30 days, escalate to human support."
  },
  {
    "id": "food_tampered",
    "title": "Tampered Food Policy",
    "content": "If food appears tampered, apologize immediately. Offer full refund without delay. Escalate to safety team. Mark delivery partner for investigation."
  },
  {
    "id": "late_delivery",
    "title": "Late Delivery Policy",
    "content": "If delivery is delayed more than 45 minutes beyond estimated time, offer 30 percent refund. If more than 90 minutes, offer full refund."
  },
  {
    "id": "wrong_item",
    "title": "Wrong Item Policy",
    "content": "If a customer receives the wrong item, wrong meal, incorrect order, or missing item, apologize sincerely. Offer replacement or full refund. Ask whether they prefer refund or redelivery. Escalate if repeated more than twice in 30 days."
  },
  {
        "id": "allergy_issue",
        "title": "Allergen Safety Policy",
        "content": "If a customer reports an allergy issue or allergic reaction, apologize immediately. This is a safety priority. If the customer received an item containing an allergen they requested to exclude, offer an immediate full refund. If the customer reports a medical reaction to an allergen, escalate to the emergency safety team and human support instantly."
    },
    {
        "id": "driver_misconduct",
        "title": "Driver Conduct Policy",
        "content": "If a customer reports driver misconduct, a rude driver, or an unprofessional delivery person, apologize sincerely. If the driver was aggressive, unprofessional, or violated safety rules, escalate the complaint to the logistics management team. If the driver failed to follow specific delivery instructions or was rude, provide a 20 percent discount voucher for the next order."
    },
    {
        "id": "payment_billing",
        "title": "Payment and Billing Policy",
        "content": "If a customer has a payment query, billing error, or was overcharged, verify the order total. If there is a double payment or incorrect billing amount, initiate a refund to the original payment method. For general payment inquiries regarding promo codes, discounts, or credit card errors, provide the billing FAQ link. Escalate complex billing disputes to the finance department."
    },
    {
        "id": "missing_entire_order",
        "title": "Non-Delivery Policy",
        "content": "If an entire order is missing, or the customer reports the order was never delivered despite being marked as completed, apologize. Check the driver's GPS coordinates at the time of delivery. If the order cannot be located or was delivered to the wrong address, offer a full refund or a free redelivery. Escalate to the fraud prevention team if the same customer reports a missing order more than twice."
    },
  {
        "id": "packaging_defect",
        "title": "Packaging Quality Policy",
        "content": "If a customer reports a packaging defect, such as a broken container, leaked sauce, or crushed box that makes the food difficult to eat, apologize. If the packaging failure resulted in food being spilled or inedible, offer a full refund. If the damage is cosmetic only, offer a 10 percent discount."
    },
    {
        "id": "item_quality_texture",
        "title": "Food Texture and Quality Policy",
        "content": "If a customer complains about food quality issues like soggy fries, rubbery steak, or stale bread, apologize. These are quality concerns. Offer a 50 percent refund or a replacement. If the food is reported as 'inedible' due to texture, offer a full refund."
    },
    {
        "id": "technical_app_error",
        "title": "App and Technical Support Policy",
        "content": "If a customer reports a technical error, app crash, or GPS tracking failure, apologize. Verify if the error prevented the order from being placed or tracked. If a technical glitch resulted in a failed delivery, offer a full refund. Escalate persistent app bugs to the engineering team."
    },
    {
        "id": "temperature_issue",
        "title": "Food Temperature Policy",
        "content": "If a customer reports that hot food arrived cold or cold food arrived warm, apologize. If the temperature issue is due to a delivery delay (over 30 mins late), offer a full refund. If the delivery was on time but the temperature is poor, offer a 30 percent discount."
    },
    {
        "id": "dietary_preference_violation",
        "title": "Dietary Preference Policy",
        "content": "If a customer receives a meal that violates a non-allergy dietary preference (e.g., meat in a vegetarian dish, or non-halal meat), apologize sincerely. Offer a replacement or full refund. This policy applies to religious, ethical, or lifestyle dietary choices."
    },
    {
        "id": "missing_utensils_condiments",
        "title": "Missing Add-ons Policy",
        "content": "If a customer reports missing utensils, straws, or small condiments (ketchup, napkins), apologize. Since these are low-value items, offer a small credit ($2-$5) to the customer account. Do not offer full refunds for missing napkins or straws."
    },
    {
        "id": "subscription_billing",
        "title": "Premium Subscription Policy",
        "content": "If a customer has a query about a 'Pro' or 'Gold' subscription, or was charged a monthly fee unexpectedly, explain the cancellation process. If the customer was charged after a trial ended, offer a one-time courtesy refund of the subscription fee and escalate to the accounts team."
    },
    {
        "id": "promotion_issue",
        "title": "Promotion and Voucher Policy",
        "content": "If a voucher code is not working or a 'Buy One Get One' (BOGO) deal was not honored by the restaurant, apologize. Manually apply the discount as a credit to the account if the customer provides a screenshot of the valid promotion."
    },
    {
        "id": "restaurant_service_complaint",
        "title": "Restaurant Performance Policy",
        "content": "If a customer complains about the restaurant's food preparation (e.g., portion size too small, food looks nothing like the photo), apologize. Note the feedback for the restaurant partner. Offer a 20 percent discount if the portion size is significantly smaller than advertised."
    },
    {
        "id": "environmental_policy",
        "title": "Sustainability and Waste Policy",
        "content": "If a customer complains about excessive plastic usage or receiving plastic cutlery despite opting out, apologize. Log the feedback for our sustainability team. This does not qualify for a refund but helps us improve our eco-friendly goals."
    }
]

with open("policies.json", "w") as f:
    json.dump(policies, f, indent=2)


In [13]:
policy_retrieval_texts = [
    "spoiled food, rotten food, bad smell, expired food, stale food",
    "food tampered, packaging opened, seal broken, safety issue",
    "delivery late, delayed order, arrived late, long wait time",
    "wrong item, wrong meal, incorrect order, missing item, different dish received"
]


In [14]:
with open("policies.json", "r") as f:
    policies = json.load(f)


In [15]:
def retrieve_policy(category):
    for policy in policies:
        if policy["id"] == category:
            return policy["content"]
    return "No policy found."


In [16]:
#Test1
result = classify_complaint("My order arrived 1 hour late")
policy_text = retrieve_policy(result["category"])

print("Classification:", result)
print("Policy:", policy_text)


Classification: {'category': 'late_delivery', 'confidence': 1.0}
Policy: If delivery is delayed more than 45 minutes beyond estimated time, offer 30 percent refund. If more than 90 minutes, offer full refund.


In [17]:
import json

def generate_response(complaint, category, policy_text):
    prompt = f"""
You are a professional customer support assistant.

You must strictly follow the company policy provided below.

Company Policy:
{policy_text}

Customer Complaint:
{complaint}

Instructions:
- Follow policy strictly.
- Do not invent compensation beyond policy.
- Return JSON only:

{{
  "response_text": "",
  "action_taken": "",
  "escalate": false
}}
"""

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=[
            {"role": "system", "content": "You are a strict JSON-only customer support AI."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.3
    )

    raw_output = response.choices[0].message.content

    try:
        return json.loads(raw_output)
    except:
        return {"error": "Invalid JSON", "raw": raw_output}


In [18]:
def process_complaint(complaint):
    classification = classify_complaint(complaint)

    if classification.get("confidence", 0) < 0.6:
        return {
            "response_text": "Your complaint requires human review.",
            "action_taken": "Escalated to human support",
            "escalate": True
        }

    policy_text = retrieve_policy(classification["category"])

    response = generate_response(
        complaint,
        classification["category"],
        policy_text
    )

    return response


In [19]:
#Test2
complaint = "My order arrived 1 hour late"

result = classify_complaint(complaint)
policy_text = retrieve_policy(result["category"])

final_response = generate_response(
    complaint,
    result["category"],
    policy_text
)

print(final_response)


{'response_text': 'We apologize for the delay in your order. Since it arrived 1 hour late, which is more than 45 minutes beyond the estimated time, we are offering a 30% refund.', 'action_taken': 'Refund processed', 'escalate': False}


In [20]:
#Test3
print(process_complaint("Service was terrible"))


{'response_text': 'Your complaint requires human review.', 'action_taken': 'Escalated to human support', 'escalate': True}


In [21]:
!pip install sentence-transformers faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


In [22]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

embedding_model = SentenceTransformer("all-mpnet-base-v2")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

MPNetModel LOAD REPORT from: sentence-transformers/all-mpnet-base-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [23]:
import json

with open("policies.json", "r") as f:
    policies = json.load(f)

policy_texts = [p["title"] + " " + p["content"] for p in policies]


In [24]:
policy_embeddings = embedding_model.encode(
    policy_retrieval_texts,
    normalize_embeddings=True
)

dimension = policy_embeddings.shape[1]

index = faiss.IndexFlatIP(dimension)
index.add(np.array(policy_embeddings))


In [25]:
def retrieve_policy_rag(query, top_k=1):
    query_embedding = embedding_model.encode(
        [query],
        normalize_embeddings=True
    )

    distances, indices = index.search(
        np.array(query_embedding),
        top_k
    )

    score = float(distances[0][0])
    idx = indices[0][0]

    return {
        "policy_id": policies[idx]["id"],
        "content": policies[idx]["content"],
        "score": score
    }


In [26]:
#Test 2.1
print(retrieve_policy_rag("My food smelled bad and looked spoiled"))
print(retrieve_policy_rag("I received the wrong meal"))
print(retrieve_policy_rag("Delivery was very late"))
print(retrieve_policy_rag("What are your working hours?"))


{'policy_id': 'food_spoiled', 'content': 'If a customer reports spoiled food, apologize sincerely. Ask for image proof. Offer a full refund. If the same customer reports spoiled food more than twice in 30 days, escalate to human support.', 'score': 0.6089155673980713}
{'policy_id': 'wrong_item', 'content': 'If a customer receives the wrong item, wrong meal, incorrect order, or missing item, apologize sincerely. Offer replacement or full refund. Ask whether they prefer refund or redelivery. Escalate if repeated more than twice in 30 days.', 'score': 0.7742288112640381}
{'policy_id': 'late_delivery', 'content': 'If delivery is delayed more than 45 minutes beyond estimated time, offer 30 percent refund. If more than 90 minutes, offer full refund.', 'score': 0.7017365097999573}
{'policy_id': 'late_delivery', 'content': 'If delivery is delayed more than 45 minutes beyond estimated time, offer 30 percent refund. If more than 90 minutes, offer full refund.', 'score': 0.17758768796920776}


In [27]:
SIMILARITY_THRESHOLD = 0.50


def decision_layer(user_text):
    result = retrieve_policy_rag(user_text)

    if result["score"] < SIMILARITY_THRESHOLD:
        return {
            "escalate": True,
            "reason": "Low similarity score",
            "score": result["score"]
        }

    return {
        "escalate": False,
        "policy": result
    }


In [28]:
#Test 2.2
print(decision_layer("My food smelled bad"))
print(decision_layer("What are your working hours?"))


{'escalate': False, 'policy': {'policy_id': 'food_spoiled', 'content': 'If a customer reports spoiled food, apologize sincerely. Ask for image proof. Offer a full refund. If the same customer reports spoiled food more than twice in 30 days, escalate to human support.', 'score': 0.5723899006843567}}
{'escalate': True, 'reason': 'Low similarity score', 'score': 0.17758768796920776}


In [29]:
def generate_response(user_text, policy_content):

    prompt = f"""
You are an automated customer support agent.

You MUST strictly follow the policy below.
Do NOT invent rules or compensation outside this policy.
If the complaint requires escalation according to the policy, set escalate to true.

Policy:
{policy_content}

Customer Complaint:
{user_text}

Return JSON only:
{{
  "response_text": "",
  "action_taken": "",
  "escalate": false
}}
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",  # or current Groq model
        messages=[
            {"role": "system", "content": "You output strict JSON only."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    return response.choices[0].message.content


In [30]:
def handle_complaint(user_text):

    decision = decision_layer(user_text)

    if decision["escalate"]:
        return {
            "response_text": "Your complaint requires human review.",
            "action_taken": "Escalated to human support",
            "escalate": True
        }

    policy = decision["policy"]["content"]

    return generate_response(user_text, policy)


In [31]:
#Test 2.3
print(retrieve_policy_rag("My food smelled rotten"))
print(retrieve_policy_rag("Delivery came 2 hours late"))
print(retrieve_policy_rag("I got the wrong meal"))
print(retrieve_policy_rag("Do you sell groceries?"))

{'policy_id': 'food_spoiled', 'content': 'If a customer reports spoiled food, apologize sincerely. Ask for image proof. Offer a full refund. If the same customer reports spoiled food more than twice in 30 days, escalate to human support.', 'score': 0.5749887228012085}
{'policy_id': 'late_delivery', 'content': 'If delivery is delayed more than 45 minutes beyond estimated time, offer 30 percent refund. If more than 90 minutes, offer full refund.', 'score': 0.7312790155410767}
{'policy_id': 'wrong_item', 'content': 'If a customer receives the wrong item, wrong meal, incorrect order, or missing item, apologize sincerely. Offer replacement or full refund. Ask whether they prefer refund or redelivery. Escalate if repeated more than twice in 30 days.', 'score': 0.6123414039611816}
{'policy_id': 'food_tampered', 'content': 'If food appears tampered, apologize immediately. Offer full refund without delay. Escalate to safety team. Mark delivery partner for investigation.', 'score': 0.27185899019

In [32]:
print(retrieve_policy_rag("I received the wrong meal"))


{'policy_id': 'wrong_item', 'content': 'If a customer receives the wrong item, wrong meal, incorrect order, or missing item, apologize sincerely. Offer replacement or full refund. Ask whether they prefer refund or redelivery. Escalate if repeated more than twice in 30 days.', 'score': 0.7742288112640381}


In [50]:
def handle_complaint(user_text):

    decision = decision_layer(user_text)

    log_entry = {
        "timestamp": str(datetime.datetime.now(datetime.UTC)),
        "user_text": user_text,
        "similarity_score": None,
        "policy_id": None,
        "escalated": None,
        "response": None
    }

    if decision["escalate"]:
        log_entry["similarity_score"] = decision.get("score")
        log_entry["escalated"] = True
        log_entry["response"] = "Escalated to human support"

        interaction_logs.append(log_entry)

        return {
            "response_text": "Your complaint requires human review.",
            "action_taken": "Escalated to human support",
            "escalate": True
        }

    policy = decision["policy"]
    log_entry["similarity_score"] = policy["score"]
    log_entry["policy_id"] = policy["policy_id"]
    log_entry["escalated"] = False

    response = generate_response(user_text, policy["content"])

    parsed_response = json.loads(response)
    log_entry["response"] = parsed_response


    interaction_logs.append(log_entry)

    return response


In [51]:
import json

def save_logs():
    with open("interaction_logs.json", "w") as f:
        json.dump(interaction_logs, f, indent=2)


In [52]:
print(handle_complaint("My food smelled rotten"))
print(handle_complaint("Delivery was 2 hours late"))
print(handle_complaint("I received the wrong dish"))
print(handle_complaint("Do you sell groceries?"))


{
  "response_text": "We apologize sincerely for the inconvenience. Can you please provide us with an image of the spoiled food as proof?",
  "action_taken": "Offered full refund",
  "escalate": false
}
{
  "response_text": "We apologize for the delay in your delivery. Since your delivery was more than 90 minutes late, we are offering a full refund.",
  "action_taken": "Full refund offered",
  "escalate": false
}
{
  "response_text": "We apologize sincerely for the mistake. You have the option of a replacement or a full refund. Would you prefer a refund or for us to redeliver the correct dish?",
  "action_taken": "Offered replacement or refund",
  "escalate": false
}
{'response_text': 'Your complaint requires human review.', 'action_taken': 'Escalated to human support', 'escalate': True}


In [53]:
handle_complaint("My food smelled rotten")
print(interaction_logs)


[{'timestamp': '2026-02-08 07:59:49.071382+00:00', 'user_text': 'Do you sell groceries?', 'similarity_score': 0.27185899019241333, 'policy_id': None, 'escalated': True, 'response': {'response_text': 'Escalated to human support'}}, {'timestamp': '2026-02-08 08:00:06.587432+00:00', 'user_text': 'My food smelled rotten', 'similarity_score': 0.5749887228012085, 'policy_id': 'food_spoiled', 'escalated': False, 'response': {'response_text': 'We apologize sincerely for the inconvenience. Can you please provide us with an image of the spoiled food as proof?', 'action_taken': 'Offered full refund', 'escalate': False}}, {'timestamp': '2026-02-08 08:00:06.958774+00:00', 'user_text': 'Delivery was 2 hours late', 'similarity_score': 0.7216912508010864, 'policy_id': 'late_delivery', 'escalated': False, 'response': {'response_text': 'We apologize for the delay in your delivery. Since your delivery was more than 90 minutes late, we are offering a full refund.', 'action_taken': 'Full refund offered', '

In [54]:
import json

parsed_response = json.loads(response)
log_entry["response"] = parsed_response


NameError: name 'response' is not defined

In [41]:
save_logs()


Evaluation

In [42]:
test_cases = [
    {"text": "My food smelled rotten", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "The food was tampered", "expected_policy": "food_tampered", "should_escalate": False},
    {"text": "Delivery was 2 hours late", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "I received the wrong dish", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Do you sell groceries?", "expected_policy": None, "should_escalate": True},
    {"text": "My pizza arrived cold and soggy.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I got the wrong burger instead of my ordered chicken sandwich.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The delivery was 2 hours late and I missed my meeting.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The salad had a weird smell and tasted spoiled.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "My order was tampered with, someone opened the package.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I ordered fries but got onion rings.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The food was cold and late.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "My meal arrived 90 minutes late.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The delivery bag was damaged and food was spilled.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I got the wrong item and it was spoiled.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Pizza arrived burnt.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "The order never arrived.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "My burger had a hair in it.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I received the wrong drink.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Food was spoiled and delivery was late.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "The sushi was cold when it arrived.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "My dessert was missing from the order.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Order came 45 minutes late.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The packaging was tampered with.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I got a sandwich instead of a wrap.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The pasta smelled bad.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "The delivery person was rude and late.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The sauce was missing and the order was wrong.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The food was tampered with and had foreign objects.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I received the wrong pizza toppings.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The chicken smelled rotten.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "My order was 2 hours late and half of it was missing.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "Someone opened my meal before delivery.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The drink I ordered was not delivered.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Food was cold and looked spoiled.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I got the wrong dessert and it was melted.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Delivery was late and driver did not follow instructions.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "My package was tampered with, plastic seal broken.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I received a completely different order.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The fries were soggy.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "The order came late and food was cold.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "My sandwich was missing an ingredient.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Food packaging was opened before delivery.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I got the wrong side dish.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The pizza was spoiled when I opened it.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Delivery arrived an hour late.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The order seemed tampered with.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I received a wrong combo meal.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "My steak was overcooked and smelled bad.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Food was delayed by 90 minutes.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The package was broken and the food spilled.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I got a completely different dessert.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "My salad was rotten.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Delivery came 3 hours late.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "Food was tampered with, seal broken.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I received the wrong item twice in a week.", "expected_policy": "wrong_item", "should_escalate": True},
    {"text": "Do you sell groceries?", "expected_policy": None, "should_escalate": True},
    {"text": "Can I change my order for tomorrow?", "expected_policy": None, "should_escalate": True},
    {"text": "The milk in my latte is curdled and chunky.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I found a piece of blue plastic wrap inside my burrito.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "I ordered a steak medium-rare but it's raw and cold.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Tracker says delivered but my porch is empty.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "There are finger marks in the frosting of my cupcake.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "This chicken has a weird chemical smell to it.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Missing my side order of mac and cheese.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The driver took a massive detour, food is now ice cold.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The staples on the bag were all ripped out when it arrived.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "My prawns are slimy and smell very fishy.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Sent me a Diet Pepsi instead of the regular Coke I ordered.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The delivery is 40 minutes past the maximum arrival window.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The safety seal on the drink lid was already peeled off.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The avocado in this bowl is completely black and mushy.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Ordered a family meal but only received one burger.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The driver is stuck in the same spot on the map for 30 mins.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "Someone definitely ate some of my fries, the box is half empty.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The bread on this sandwich is covered in green mold.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Received a pepperoni pizza instead of the veggie one.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "My lunch arrived after my break ended, over an hour late.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The box was crushed and there's a footprint on it.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The eggs in my breakfast wrap were green and slimy.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Requested no onions due to allergy but the burger is full of them.", "expected_policy": "wrong_item", "should_escalate": True},
    {"text": "Driver went to the wrong street and the food is now frozen.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The bag was soaked with something that smells like cigarettes.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The sushi rice is hard as a rock and smells fermented.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I ordered a large shake but got a small cup.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Delivery guy was 50 minutes late and very aggressive.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The tamper sticker was cut with a knife.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The meat in this taco tastes sour and old.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Missing my drink and the extra dipping sauces.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The courier is lost and won't answer any calls.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "There's a band-aid inside the takeout container.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The fruit salad is fizzy and clearly fermenting.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I got a hot latte when I specifically ordered an iced one.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Waiting 90 minutes for a simple sandwich is unacceptable.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The drink carrier was empty but my receipt shows 3 sodas.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Found a piece of metal wire in my noodles.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The pork chops are gray and smell like sulfur.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I received someone else's order meant for 'Sarah'.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "My food has been 'Out for Delivery' for 2 hours now.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The pizza box was delivered open and half the cheese is gone.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The soup has a weird white film growing on top.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I ordered a vegan wrap but there is real chicken in here.", "expected_policy": "wrong_item", "should_escalate": True},
    {"text": "The driver left the bag at a random house down the street.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The safety tape was peeled back and stuck back down poorly.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The fish in this poke bowl is mushy and smells putrid.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Requested gluten-free crust but received wheat crust.", "expected_policy": "wrong_item", "should_escalate": True},
    {"text": "App says delivered but the driver is still miles away.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "I found a long black hair baked into the pizza crust.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The yogurt is 4 days past the expiration date.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "They forgot the fries that come with the combo.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "My order is 75 minutes late and I want a refund.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The bag was ripped open and my wings were missing.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The beef in the stir-fry is literally raw.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Ordered a latte but received a cup of hot water.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The driver didn't follow the 'No Contact' instruction.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "There's a lipstick mark on the rim of my coffee cup.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The pasta sauce is bubbling and tastes like vinegar.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Received a chocolate donut instead of the glazed one.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "It’s been 3 hours and support won't answer me.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The container looks like someone took a bite out of the burger.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "This milk smells like it's been left in the sun.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Missing my dessert and my drink.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The driver left the food in the rain on the sidewalk.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The bag was open and my drink was half empty.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The chicken wings are still frozen in the center.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Ordered a 12 pack of nuggets but only got 6.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The rider is going in the completely opposite direction.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "Found a twist-tie in my mashed potatoes.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The steak has a grayish color and smells rotten.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "Received a Sprite instead of a 7-Up.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Driver was an hour late and was very rude.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The pizza was delivered vertically, all toppings are ruined.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The salad greens are slimy and brown.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I got a wrap instead of the bowl I paid for.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Delivery is 2 hours late and cold.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I found a bug in my salad.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The cheese on my nachos is hard and smells like plastic.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "Missing the extra side of rice I paid for.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The app crashed and now I can't track my late order.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "The drink lid has a hole poked through it that wasn't for a straw.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The turkey meat is slimy and iridescent.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I received a vanilla shake instead of chocolate.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The driver didn't follow delivery instructions at all.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The bag was wide open and napkins were scattered everywhere.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The bread in this sandwich is stale and rock hard.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I got a coffee but it has no sugar or cream like requested.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The delivery window was 12:00, it is now 1:30.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "There are bite marks on the side of the sandwich.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The cream in the pastry tastes extremely sour.", "expected_policy": "food_spoiled", "should_escalate": False},
    {"text": "I ordered a burger combo but only the burger arrived.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "The rider stopped at another house for 40 minutes.", "expected_policy": "late_delivery", "should_escalate": False},
    {"text": "The pizza box was crushed and the sauce is everywhere.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "The chicken tastes like it has been sitting out all day.", "expected_policy": "food_spoiled", "should_escalate": True},
    {"text": "I received the wrong flavor of wings.", "expected_policy": "wrong_item", "should_escalate": False},
    {"text": "Delivery took so long the restaurant is now closed.", "expected_policy": "late_delivery", "should_escalate": True},
    {"text": "I found a staple inside my burrito wrap.", "expected_policy": "food_tampered", "should_escalate": True},
    {"text": "What are your delivery hours?", "expected_policy": None, "should_escalate": True},
    {"text": "Do you offer any discounts for first-time users?", "expected_policy": None, "should_escalate": True},
    {"text": "I need help with a refund from yesterday.", "expected_policy": None, "should_escalate": True},
    {"text": "I specifically asked for no peanuts due to a severe allergy, but this pad thai is covered in them. I feel a reaction starting.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The driver was incredibly rude to me at the door and used foul language. I feel very uncomfortable.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "I was charged twice for my pizza order. My bank statement shows two identical transactions.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "The app says my food was delivered 20 minutes ago but my porch is totally empty. Where is my food?", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I received a burger with cheese even though I indicated a dairy allergy in the notes.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The delivery person left my food in the middle of the driveway and yelled at me when I asked why.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "I tried to use a 50% off promo code but the app charged me the full amount anyway.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "My order never arrived. I've been waiting for three hours and the driver is not answering.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "This salad has walnuts in it. I am allergic to tree nuts and could have died if I didn't see them.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The driver was driving recklessly and almost hit my mailbox before handing me the food.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "My credit card was charged $50 but the receipt in the bag says $35. Why the overcharge?", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "The tracker shows the order as 'Delivered' but the photo is of a completely different house.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I have a gluten allergy and the restaurant sent me regular wheat bread by mistake.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The delivery guy refused to come to my door and told me to 'come get it yourself' in a very mean tone.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "I need to change my payment method for the order I just placed.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "I'm standing outside and there is no sign of the driver or my food. It’s been an hour since it was 'delivered'.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "There is shellfish in this dish and I have a deadly seafood allergy. Please help.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The driver was unprofessional and was wearing a dirty shirt while eating some of my fries.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "My refund from last week hasn't shown up in my bank account yet. When will I get my money back?", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "The driver stole my food. I watched him pull up on the app, wait a second, and then drive away.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I requested a soy-free meal but this clearly contains soy sauce. I am having an allergic response.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The courier was very rude and made inappropriate comments about my house.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "I have an unauthorized charge on my account from this app that I didn't make.", "expected_policy": "payment_billing", "should_escalate": True},
    {"text": "The order is missing entirely. The restaurant says the driver picked it up, but it's not here.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I am allergic to eggs and this mayo was on my sandwich despite my 'No Mayo' request.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The delivery person was extremely impatient and kept ringing the bell non-stop.", "expected_policy": "driver_misconduct", "should_escalate": False},
    {"text": "Why was I charged a 'service fee' that wasn't shown at checkout?", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "The app says my order is complete but I never received anything.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "This food contains sesame seeds. I am highly allergic and the menu didn't mention them.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The driver threw my food at the door and just walked away without a word.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "I am trying to add a new credit card but the payment screen keeps freezing.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "My order is gone. I think it was delivered to the wrong building again.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I ordered the nut-free cake but I can smell almonds. This is dangerous for me.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The delivery driver was very unprofessional and didn't follow the 'Leave at door' note.", "expected_policy": "driver_misconduct", "should_escalate": False},
    {"text": "My total was $25 but I was charged $35. I want a refund for the difference.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "I've been waiting for my order for 2 hours and now it just says 'Cancelled'. Where is my food?", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I accidentally ate a bite of this and it has mushrooms. I am allergic to mushrooms!", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The courier was rude when I asked why they were late.", "expected_policy": "driver_misconduct", "should_escalate": False},
    {"text": "The app says my payment failed but the money was still taken from my account.", "expected_policy": "payment_billing", "should_escalate": True},
    {"text": "The driver marked the order as delivered but he is still at the restaurant on the map.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "Is there dairy in your pizza dough? I have a severe allergy.", "expected_policy": "allergy_issue", "should_escalate": False},
    {"text": "The driver left the food in front of my neighbor's door, not mine.", "expected_policy": "driver_misconduct", "should_escalate": False},
    {"text": "Can I get a receipt for my payment sent to my email?", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "I've been looking everywhere but my order is nowhere to be found. It is missing.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I specifically ordered a gluten-free meal and this has a 'contains wheat' sticker on it.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The delivery person was shouting on the phone and was very unprofessional during drop off.", "expected_policy": "driver_misconduct", "should_escalate": False},
    {"text": "I want to remove my saved payment information from the system.", "expected_policy": "payment_billing", "should_escalate": False},
    {"text": "The driver called me and said he ate my food because he was hungry. I am not joking.", "expected_policy": "missing_entire_order", "should_escalate": True},
    {"text": "I am having an allergic reaction to the honey in this tea. I asked for no honey.", "expected_policy": "allergy_issue", "should_escalate": True},
    {"text": "The driver was late and acted like it was my fault. Extremely rude behavior.", "expected_policy": "driver_misconduct", "should_escalate": True},
    {"text": "The plastic lid on my curry was cracked and half the sauce is in the bottom of the bag.", "expected_policy": "packaging_defect", "should_escalate": False},
    {"text": "My fries are so soggy they feel like wet cardboard. I can't even pick them up.", "expected_policy": "item_quality_texture", "should_escalate": False},
    {"text": "The app crashed right after I paid, and I didn't get a confirmation email. Did my order go through?", "expected_policy": "technical_app_error", "should_escalate": True},
    {"text": "My milkshake is completely melted and warm. It looks like milk.", "expected_policy": "temperature_issue", "should_escalate": False},
    {"text": "I am a strict vegetarian for religious reasons, but there are pieces of bacon in my salad.", "expected_policy": "dietary_preference_violation", "should_escalate": True},
    {"text": "I ordered a salad and soup but I didn't get any spoons or forks. How am I supposed to eat this at work?", "expected_policy": "missing_utensils_condiments", "should_escalate": False},
    {"text": "I was charged $9.99 for a subscription I never signed up for. Please refund me.", "expected_policy": "subscription_billing", "should_escalate": True},
    {"text": "The BOGO deal for the tacos didn't work. I only got one taco but I was charged for the deal.", "expected_policy": "promotion_issue", "should_escalate": False},
    {"text": "The portion of pasta I received is tiny. It looks like a side dish, not a $20 main course.", "expected_policy": "restaurant_service_complaint", "should_escalate": False},
    {"text": "I checked the 'no plastic' box but my bag is full of plastic straws and cutlery. This is so wasteful.", "expected_policy": "environmental_policy", "should_escalate": False},
    {"text": "The box was totally crushed and the pizza is just a ball of dough now.", "expected_policy": "packaging_defect", "should_escalate": False},
    {"text": "This steak is like rubber. I’ve been chewing one piece for five minutes.", "expected_policy": "item_quality_texture", "should_escalate": False},
    {"text": "The GPS on the app is stuck at the restaurant but the driver just called saying he is outside.", "expected_policy": "technical_app_error", "should_escalate": False},
    {"text": "My pizza arrived ice cold. The box feels like it was in a fridge.", "expected_policy": "temperature_issue", "should_escalate": False},
    {"text": "I ordered a Halal chicken bowl but the receipt says 'Regular Pork'. This is a huge problem.", "expected_policy": "dietary_preference_violation", "should_escalate": True},
    {"text": "You forgot the ketchup packets and the napkins I specifically asked for.", "expected_policy": "missing_utensils_condiments", "should_escalate": False},
    {"text": "I cancelled my Pro membership last month but I see another charge today.", "expected_policy": "subscription_billing", "should_escalate": True},
    {"text": "The promo code 'SAVE50' says it's invalid even though the banner says it's active today.", "expected_policy": "promotion_issue", "should_escalate": False},
    {"text": "The food looks nothing like the pictures in the app. It's very misleading.", "expected_policy": "restaurant_service_complaint", "should_escalate": False},
    {"text": "Why do you guys use so much styrofoam? It's 2026, get better packaging.", "expected_policy": "environmental_policy", "should_escalate": False},
    {"text": "My drink spilled all over the burger because the cup holder was flimsy.", "expected_policy": "packaging_defect", "should_escalate": False},
    {"text": "The bread on my sub is so stale it's literally crumbling into dust.", "expected_policy": "item_quality_texture", "should_escalate": False},
    {"text": "I can't log into my account. It says 'Internal Server Error' every time.", "expected_policy": "technical_app_error", "should_escalate": True},
    {"text": "The sushi is warm. This is dangerous, it should be cold!", "expected_policy": "temperature_issue", "should_escalate": True},
    {"text": "I ordered a vegan burger and got a real beef patty. I haven't eaten meat in 10 years.", "expected_policy": "dietary_preference_violation", "should_escalate": True},
    {"text": "No straw for my iced coffee. I can't drink this while driving.", "expected_policy": "missing_utensils_condiments", "should_escalate": False},
    {"text": "I want a refund for the 'Gold' fee. I didn't use any of the benefits this month.", "expected_policy": "subscription_billing", "should_escalate": False},
    {"text": "The restaurant refused to honor the 'free drink' coupon I had.", "expected_policy": "promotion_issue", "should_escalate": False},
    {"text": "I paid extra for 'Extra Meat' but there's barely any chicken in this bowl.", "expected_policy": "restaurant_service_complaint", "should_escalate": False},
    {"text": "The delivery bag was tied with ten plastic zip ties. That's way too much plastic.", "expected_policy": "environmental_policy", "should_escalate": False},
    {"text": "The paper bag ripped from the bottom and my food fell onto the street.", "expected_policy": "packaging_defect", "should_escalate": False},
    {"text": "This calamari is so overcooked it's like eating a rubber band.", "expected_policy": "item_quality_texture", "should_escalate": False},
    {"text": "The checkout button is greyed out even though I filled in all the info.", "expected_policy": "technical_app_error", "should_escalate": False},
    {"text": "My soup is lukewarm and the bread is cold. This was supposed to be a hot meal.", "expected_policy": "temperature_issue", "should_escalate": False},
    {"text": "This dish has eggs in it. I am a vegan for ethical reasons and this is upsetting.", "expected_policy": "dietary_preference_violation", "should_escalate": False},
    {"text": "I need a fork! My hotel doesn't have any and the driver didn't bring them.", "expected_policy": "missing_utensils_condiments", "should_escalate": False},
    {"text": "Why was I billed for a yearly subscription when I selected monthly?", "expected_policy": "subscription_billing", "should_escalate": True},
    {"text": "The 'Free Delivery' promo didn't apply and I was charged $5.99 for shipping.", "expected_policy": "promotion_issue", "should_escalate": False},
    {"text": "I ordered a 'Jumbo' wings plate but they are tiny, like popcorn chicken.", "expected_policy": "restaurant_service_complaint", "should_escalate": False},
    {"text": "Stop sending me so many printed flyers in the bag, it's a waste of paper.", "expected_policy": "environmental_policy", "should_escalate": False},
    {"text": "The container melted because the food was too hot, now there is plastic in my rice.", "expected_policy": "packaging_defect", "should_escalate": True},
    {"text": "The rice is undercooked and crunchy. I can't eat this.", "expected_policy": "item_quality_texture", "should_escalate": False},
    {"text": "The app keeps saying I'm in a different city, so I can't order from local places.", "expected_policy": "technical_app_error", "should_escalate": True},
    {"text": "The ice cream arrived as liquid. It took the driver 40 minutes in a hot car.", "expected_policy": "temperature_issue", "should_escalate": False},
    {"text": "I ordered a pepperoni pizza with 'No Pork' and they used real pork pepperoni.", "expected_policy": "dietary_preference_violation", "should_escalate": True},
    {"text": "Where are the chopsticks? I ordered Chinese food for 5 people and no chopsticks.", "expected_policy": "missing_utensils_condiments", "should_escalate": False},
    {"text": "I was charged for a trial that was supposed to be free for 30 days.", "expected_policy": "subscription_billing", "should_escalate": True},
    {"text": "I have a screenshot of the $10 off deal but it didn't apply to my order.", "expected_policy": "promotion_issue", "should_escalate": False},
    {"text": "The burger patty is half the size of the bun. This is a ripoff.", "expected_policy": "restaurant_service_complaint", "should_escalate": False},
    {"text": "Please use paper bags instead of these thick plastic ones next time.", "expected_policy": "environmental_policy", "should_escalate": False}
]


In [43]:
multi_policy_test_cases = [
    {"text": "My food smelled rotten", "expected_policies": ["food_spoiled"], "should_escalate": False},
    {"text": "The food was tampered", "expected_policies": ["food_tampered"], "should_escalate": False},
    {"text": "Delivery was 2 hours late", "expected_policies": ["late_delivery"], "should_escalate": False},
    {"text": "I received the wrong dish", "expected_policies": ["wrong_item"], "should_escalate": False},
    {"text": "Do you sell groceries?", "expected_policies": [], "should_escalate": True},
    {"text": "My pizza arrived cold and soggy.", "expected_policies": ["food_spoiled"], "should_escalate": False},
    {"text": "The delivery was 2 hours late and I missed my meeting.", "expected_policies": ["late_delivery"], "should_escalate": True},
    {"text": "The salad had a weird smell and tasted spoiled.", "expected_policies": ["food_spoiled"], "should_escalate": False},
    {"text": "My order was tampered with, someone opened the package.", "expected_policies": ["food_tampered"], "should_escalate": True},
    {"text": "I got the wrong burger instead of my ordered chicken sandwich.", "expected_policies": ["wrong_item"], "should_escalate": False},
    {"text": "The food was cold and late.", "expected_policies": ["food_spoiled", "late_delivery"], "should_escalate": True},
    {"text": "I got the wrong item and it was spoiled.", "expected_policies": ["wrong_item", "food_spoiled"], "should_escalate": True},
    {"text": "My burger had a hair in it.", "expected_policies": ["food_tampered"], "should_escalate": True},
    {"text": "Food was spoiled and delivery was late.", "expected_policies": ["food_spoiled", "late_delivery"], "should_escalate": True},
    {"text": "I specifically asked for no peanuts due to a severe allergy, but this pad thai is covered in them. I feel a reaction starting.", "expected_policies": ["allergy_issue"], "should_escalate": True},
    {"text": "The driver was incredibly rude to me at the door and used foul language. I feel very uncomfortable.", "expected_policies": ["driver_misconduct"], "should_escalate": True},
    {"text": "I was charged twice for my pizza order. My bank statement shows two identical transactions.", "expected_policies": ["payment_billing"], "should_escalate": False},
    {"text": "My order never arrived. I've been waiting for three hours and the driver is not answering.", "expected_policies": ["missing_entire_order"], "should_escalate": True},
    {"text": "I ordered a vegan wrap but there is real chicken in here.", "expected_policies": ["dietary_preference_violation"], "should_escalate": True},
    {"text": "The driver threw my food at the door and just walked away without a word.", "expected_policies": ["driver_misconduct"], "should_escalate": True},
    {"text": "The delivery bag was tied with ten plastic zip ties. That's way too much plastic.", "expected_policies": ["environmental_policy"], "should_escalate": False},
    {"text": "My fries are so soggy they feel like wet cardboard. I can't even pick them up.", "expected_policies": ["item_quality_texture"], "should_escalate": False},
    {"text": "The app crashed right after I paid, and I didn't get a confirmation email. Did my order go through?", "expected_policies": ["technical_app_error"], "should_escalate": True},
    {"text": "My milkshake is completely melted and warm. It looks like milk.", "expected_policies": ["temperature_issue"], "should_escalate": False},
    {"text": "Ordered a vegan burger and got a real beef patty. I haven't eaten meat in 10 years.", "expected_policies": ["dietary_preference_violation"], "should_escalate": True},
    {"text": "Missing my dessert and my drink.", "expected_policies": ["wrong_item"], "should_escalate": False},
    {"text": "I ordered a large shake but got a small cup.", "expected_policies": ["wrong_item"], "should_escalate": False},
    {"text": "Delivery took so long the restaurant is now closed.", "expected_policies": ["late_delivery"], "should_escalate": True},
    {"text": "The steak has a grayish color and smells rotten.", "expected_policies": ["food_spoiled"], "should_escalate": True},
    {"text": "I accidentally ate a bite of this and it has mushrooms. I am allergic to mushrooms!", "expected_policies": ["allergy_issue"], "should_escalate": True},
    {"text": "The delivery driver was very unprofessional and didn't follow the 'Leave at door' note.", "expected_policies": ["driver_misconduct"], "should_escalate": False},
    {"text": "The checkout button is greyed out even though I filled in all the info.", "expected_policies": ["technical_app_error"], "should_escalate": False},
]
def test_complaint_classification(classifier):
    for case in multi_policy_test_cases:
        predicted_policies = classifier.predict(case["text"])
        assert set(predicted_policies) == set(case["expected_policies"]), f"Failed for: {case['text']}"
        # Optional: test escalation logic
        assert classifier.should_escalate(case["text"]) == case["should_escalate"], f"Escalation mismatch for: {case['text']}"


In [44]:
def evaluate_system(test_cases):
    correct_retrieval = 0
    correct_escalation = 0
    false_escalations = 0

    total = len(test_cases)

    for case in test_cases:
        result = handle_complaint(case["text"])

        predicted_policy = interaction_logs[-1]["policy_id"]
        predicted_escalation = interaction_logs[-1]["escalated"]

        # Retrieval accuracy
        if predicted_policy == case["expected_policy"]:
            correct_retrieval += 1

        # Escalation accuracy
        if predicted_escalation == case["should_escalate"]:
            correct_escalation += 1

        # False escalation
        if predicted_escalation and not case["should_escalate"]:
            false_escalations += 1

    print("Retrieval Accuracy:", correct_retrieval / total)
    print("Escalation Accuracy:", correct_escalation / total)
    print("False Escalation Rate:", false_escalations / total)


In [45]:
evaluate_system(test_cases)


Retrieval Accuracy: 0.15444015444015444
Escalation Accuracy: 0.5057915057915058
False Escalation Rate: 0.41312741312741313
