In [None]:
# Hotel FAQ Chatbot  
### SVM + TF-IDF + spaCy NER


In [None]:
import pandas as pd
import spacy
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from collections import defaultdict
import joblib


In [None]:
nlp = spacy.load("en_core_web_sm")


In [None]:
# CSV file should contain: instruction, intent
df = pd.read_csv("dataset.csv")
df.head()


In [None]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

df["cleaned_text"] = df["instruction"].apply(preprocess_text)


In [None]:
vectorizer = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1, 2)
)

X = vectorizer.fit_transform(df["cleaned_text"])
y = df["intent"]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [None]:
clf = LinearSVC()
clf.fit(X_train, y_train)


In [None]:
y_pred = clf.predict(X_test)

print("=== Model Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
joblib.dump(clf, "svm_faq_model.joblib")
joblib.dump(vectorizer, "tfidf_vectorizer.joblib")


In [None]:
def extract_entities(text):
    """
    Extract named entities using spaCy
    """
    doc = nlp(text)
    entities = defaultdict(list)
    for ent in doc.ents:
        entities[ent.label_].append(ent.text)
    return entities


In [None]:
responses = {
    "greeting": "Welcome to Astra Imperium Hotel. I'm your virtual assistant. How may I assist you today?",
    "check_functions": "I can help with room reservations, hotel information, facilities, services, and general inquiries.",
    "invoices": "To request an invoice, please visit the Front Desk or email us at billing@astraimperium.com.",
    "cancellation_fees": "Cancellations are free up to 24 hours before check-in.",
    "check_in": "Check-in begins at 3:00 PM.",
    "check_out": "Check-out time is 12:00 PM.",
    "book_hotel": "To make a reservation, please visit our website or contact the Front Desk.",
    "cancel_hotel_reservation": "To cancel your reservation, please contact our Reservations Team.",
    "bring_pets": "We allow pets under 10kg with a cleaning fee.",
    "goodbye": "Thank you for choosing Astra Imperium Hotel. We look forward to welcoming you again!",
    "unknown_intent": "I'm sorry, I don't understand your question."
}


In [None]:
def get_intent(text):
    x = vectorizer.transform([preprocess_text(text)])
    return clf.predict(x)[0]


In [None]:
def respond(text):
    intent = get_intent(text)
    reply = responses.get(intent, responses["unknown_intent"])
    entities = extract_entities(text)  # NER extraction
    return reply


In [None]:
test_questions = [
    "I want to book a room for next Friday",
    "Can I cancel my reservation?",
    "Do you allow pets in the hotel?",
    "What time is check-in?",
    "How much is a room?",
    "Hello!",
    "Thanks, goodbye!"
]

for q in test_questions:
    print("User:", q)
    print("Bot:", respond(q))
    print("-" * 50)
