In [1]:
# Hotel FAQ Chatbot  
### SVM + TF-IDF + spaCy NER


In [2]:
import pandas as pd
import spacy
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from collections import defaultdict
import joblib


In [3]:
nlp = spacy.load("en_core_web_sm")


In [4]:
# CSV file should contain: instruction, intent
df = pd.read_csv("dataset.csv")
df.head()


Unnamed: 0,instruction,intent
0,I'm looking for informtion about my fucking in...,invoices
1,wanna check my invocies where could i do it,invoices
2,I'd like to see my invoices how cn i do it,invoices
3,"I need to check my fucking invoices, how could...",invoices
4,"I can't find my bills, can I get them?",invoices


In [5]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

df["cleaned_text"] = df["instruction"].apply(preprocess_text)


In [6]:
vectorizer = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1, 2)
)

X = vectorizer.fit_transform(df["cleaned_text"])
y = df["intent"]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [8]:
clf = LinearSVC()
clf.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0


In [9]:
y_pred = clf.predict(X_test)

print("=== Model Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


=== Model Evaluation ===
Accuracy: 0.98328125
                          precision    recall  f1-score   support

               add_night       1.00      0.99      0.99       240
              book_hotel       0.92      0.95      0.93       240
      book_parking_space       1.00      0.99      1.00       240
              bring_pets       0.99      1.00      1.00       240
cancel_hotel_reservation       1.00      0.96      0.98       240
       cancellation_fees       1.00      1.00      1.00       240
change_hotel_reservation       0.97      0.98      0.97       240
      check_child_policy       0.97      0.93      0.95        40
         check_functions       0.98      1.00      0.99        40
  check_hotel_facilities       1.00      0.99      0.99       240
      check_hotel_offers       0.99      0.97      0.98       240
      check_hotel_prices       0.98      0.97      0.97       240
 check_hotel_reservation       0.94      0.98      0.96       240
                check_in     

In [10]:
joblib.dump(clf, "svm_faq_model.joblib")
joblib.dump(vectorizer, "tfidf_vectorizer.joblib")


['tfidf_vectorizer.joblib']

In [11]:
def extract_entities(text):
    """
    Extract named entities using spaCy
    """
    doc = nlp(text)
    entities = defaultdict(list)
    for ent in doc.ents:
        entities[ent.label_].append(ent.text)
    return entities


In [12]:
responses = {
    "greeting": "Welcome to Astra Imperium Hotel. I'm your virtual assistant. How may I assist you today?",
    "check_functions": "I can help with room reservations, hotel information, facilities, services, and general inquiries.",
    "invoices": "To request an invoice, please visit the Front Desk or email us at billing@astraimperium.com.",
    "cancellation_fees": "Cancellations are free up to 24 hours before check-in.",
    "check_in": "Check-in begins at 3:00 PM.",
    "check_out": "Check-out time is 12:00 PM.",
    "book_hotel": "To make a reservation, please visit our website or contact the Front Desk.",
    "cancel_hotel_reservation": "To cancel your reservation, please contact our Reservations Team.",
    "bring_pets": "We allow pets under 10kg with a cleaning fee.",
    "goodbye": "Thank you for choosing Astra Imperium Hotel. We look forward to welcoming you again!",
    "unknown_intent": "I'm sorry, I don't understand your question."
}


In [13]:
def get_intent(text):
    x = vectorizer.transform([preprocess_text(text)])
    return clf.predict(x)[0]


In [14]:
def respond(text):
    intent = get_intent(text)
    reply = responses.get(intent, responses["unknown_intent"])
    entities = extract_entities(text)  # NER extraction
    return reply


In [15]:
test_questions = [
    "I want to book a room for next Friday",
    "Can I cancel my reservation?",
    "Do you allow pets in the hotel?",
    "What time is check-in?",
    "How much is a room?",
    "Hello!",
    "Thanks, goodbye!"
]

for q in test_questions:
    print("User:", q)
    print("Bot:", respond(q))
    print("-" * 50)


User: I want to book a room for next Friday
Bot: To make a reservation, please visit our website or contact the Front Desk.
--------------------------------------------------
User: Can I cancel my reservation?
Bot: To cancel your reservation, please contact our Reservations Team.
--------------------------------------------------
User: Do you allow pets in the hotel?
Bot: We allow pets under 10kg with a cleaning fee.
--------------------------------------------------
User: What time is check-in?
Bot: Check-in begins at 3:00 PM.
--------------------------------------------------
User: How much is a room?
Bot: I'm sorry, I don't understand your question.
--------------------------------------------------
User: Hello!
Bot: Welcome to Astra Imperium Hotel. I'm your virtual assistant. How may I assist you today?
--------------------------------------------------
User: Thanks, goodbye!
Bot: Thank you for choosing Astra Imperium Hotel. We look forward to welcoming you again!
-----------------