In [None]:
import spacy
import re
from collections import defaultdict
import joblib
import numpy as np
import scipy.special
import pandas as pd

# -----------------------------
# 1️⃣ Load models & spaCy
# -----------------------------
nlp = spacy.load("en_core_web_sm")
clf = joblib.load("svm_faq_model.joblib")
vectorizer = joblib.load("tfidf_vectorizer.joblib")

# -----------------------------
# 2️⃣ Responses dictionary (略，可用之前完整字典)
# -----------------------------
responses = {...}  # 上面你提供的完整 responses 字典

# -----------------------------
# 3️⃣ Preprocess text
# -----------------------------
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

# -----------------------------
# 4️⃣ Predict intent & confidence
# -----------------------------
def predict_intent(text):
    vec = vectorizer.transform([preprocess_text(text)])
    scores = clf.decision_function(vec)
    best_index = np.argmax(scores)
    intent = clf.classes_[best_index]

    # Sigmoid convert margin to percentage
    if len(scores[0]) > 1:
        sorted_scores = np.sort(scores[0])[::-1]
        margin = sorted_scores[0] - sorted_scores[1]
    else:
        margin = scores[0][0]

    confidence = scipy.special.expit(margin) * 100
    confidence = round(confidence, 2)
    return intent, confidence

# -----------------------------
# 5️⃣ Test Cases
# -----------------------------
test_cases = [
    {"input": "My flight got rescheduled, can I keep the room for one more day?", "expected": "add_night"},
    {"input": "I need to secure a spot for the upcoming Merdeka weekend.", "expected": "book_hotel"},
    {"input": "I'm driving a large SUV, will there be a spot for it?", "expected": "book_parking_space"},
    {"input": "I'm bringing my Oyen (cat) along, he is small, what are the charges?", "expected": "bring_pets"},
    {"input": "Something urgent came up and I have to scrap my KL trip entirely.", "expected": "cancel_hotel_reservation"},
    {"input": "If I pull out of the booking now, how much am I losing?", "expected": "cancellation_fees"},
    {"input": "Something came up, I need to shift my visit to next month.", "expected": "change_hotel_reservation"},
    {"input": "We are traveling with kids including a 5 year old, does she need her own bed or can she share?", "expected": "check_child_policy"},
    {"input": "I'm not sure what you are capable of handling as a bot.", "expected": "check_functions"},
    {"input": "I need to burn off some Nasi Lemak calories, do you have a treadmill?", "expected": "check_hotel_facilities"},
    {"input": "Are there any special promotions running for the school holidays?", "expected": "check_hotel_offers"},
    {"input": "What's the damage to my wallet for a night in the Deluxe room?", "expected": "check_hotel_prices"},
    {"input": "I just want to verify that my booking actually exists.", "expected": "check_hotel_reservation"},
    {"input": "We just landed at KLIA, can we get our keys before 3 PM?", "expected": "check_in"},
    {"input": "I left my power bank in room 304, can the Security Team check?", "expected": "check_lost_item"},
    {"input": "Is the menu for SkyDine available online? I need to check for Halal food.", "expected": "check_menu"},
    {"input": "We have a free evening, any nice spots specifically for sightseeing nearby?", "expected": "check_nearby_attractions"},
    {"input": "We are packing up now, can we leave slightly later than 12 PM?", "expected": "check_out"},
    {"input": "Do you accept TnG eWallet or is it cash only?", "expected": "check_payment_methods"},
    {"input": "Do you have any empty beds for tonight, or are you fully booked?", "expected": "check_room_availability"},
    {"input": "What's the difference between the deluxe and the superior?", "expected": "check_room_type"},
    {"input": "Is there a designated spot for smoking, or is the whole hotel smoke-free?", "expected": "check_smoking_policy"},
    {"input": "I'm having a hard time getting the answer I need here.", "expected": "customer_service"},
    {"input": "The noise level last night was absolutely unacceptable.", "expected": "file_complaint"},
    {"input": "I haven't seen the money return to my card yet.", "expected": "get_refund"},
    {"input": "Catch you later then.", "expected": "goodbye"},
    {"input": "Morning!", "expected": "greeting"},
    {"input": "I'm looking to organize a wedding reception, do you have a ballroom?", "expected": "host_event"},
    {"input": "I'd prefer to speak to a flesh-and-blood person.", "expected": "human_agent"},
    {"input": "My company needs a receipt for expense reporting.", "expected": "invoices"},
    {"input": "I'd love to drop a comment on TripAdvisor about your service", "expected": "leave_review"},
    {"input": "Can I use my loyalty rewards for this stay?", "expected": "redeem_points"},
    {"input": "Help me find a spot to sleep near Pavilion Bukit Bintang.", "expected": "search_hotel"},
    {"input": "What is the price for a private car transfer from the airport?", "expected": "shuttle_service"},
    {"input": "Our flight isn't until evening, can we drop our bags somewhere?", "expected": "store_luggage"},
    {"input": "What time is the last MRT train leaving from Bukit Bintang station?", "expected": "unknown_intent"},
]

# -----------------------------
# 6️⃣ Run Tests
# -----------------------------
results = []
for idx, tc in enumerate(test_cases, 1):
    pred_intent, confidence = predict_intent(tc["input"])
    results.append({
        "Test Case": f"TC{idx}",
        "Challenging Input": tc["input"],
        "Expected Intent": tc["expected"],
        "Predicted Intent": pred_intent,
        "Confidence": confidence,
        "Pass/Fail": "Pass" if pred_intent == tc["expected"] else "Fail"
    })

df_results = pd.DataFrame(results)
total_cases = len(df_results)
passed_cases = df_results["Pass/Fail"].value_counts().get("Pass", 0)
pass_percentage = round(passed_cases / total_cases * 100, 2)

print(f"Conclusion: {passed_cases}/{total_cases} ({pass_percentage}%)")
# 显示表格
df_results
