In [None]:
# =====================================================
# Hotel Customer Support Chatbot (ML-based)
# Course: Artificial Intelligence
# Session: 202509
# Approach: Machine Learning (SVM + NLP)
# =====================================================

# -------------------------
# 1. Import Libraries
# -------------------------
import pandas as pd
import re
import nltk

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

from joblib import dump, load

# -------------------------
# 2. NLTK Setup
# -------------------------
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# -------------------------
# 3. Text Preprocessing
# -------------------------
def preprocess_text(text):
    """
    Lowercase, remove punctuation, tokenize,
    remove stopwords, lemmatize
    """
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [
        lemmatizer.lemmatize(token)
        for token in tokens
        if token not in stop_words
    ]
    return ' '.join(tokens)

# -------------------------
# 4. Load Dataset
# -------------------------
# dataset.csv columns:
# instruction | intent

df = pd.read_csv("dataset.csv")
df["cleaned_text"] = df["instruction"].apply(preprocess_text)

X = df["cleaned_text"]
y = df["intent"]

# -------------------------
# 5. TF-IDF Vectorization
# -------------------------
vectorizer = TfidfVectorizer(
    ngram_range=(1, 2),
    min_df=2
)

X_vec = vectorizer.fit_transform(X)

# -------------------------
# 6. Train-Test Split
# -------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_vec,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# -------------------------
# 7. Train SVM Model
# -------------------------
model = LinearSVC()
model.fit(X_train, y_train)

# -------------------------
# 8. Model Evaluation
# -------------------------
y_pred = model.predict(X_test)

print("=== Model Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save model and vectorizer
dump(model, "intent_model.joblib")
dump(vectorizer, "tfidf_vectorizer.joblib")

# -------------------------
# 9. Simple Rule-Based Response
# -------------------------
responses = {
    "ask_room_price": "Our deluxe room costs RM180 per night.",
    "ask_booking": "I can help you book a room. Please provide your date and number of guests.",
    "ask_checkin_time": "Check-in time starts from 2:00 PM.",
    "ask_checkout_time": "Check-out time is before 12:00 PM.",
    "greeting": "Hello! How can I help you today?",
    "goodbye": "Thank you for visiting. Have a nice day!"
}

# -------------------------
# 10. Chatbot Functions
# -------------------------
def predict_intent(user_input):
    cleaned = preprocess_text(user_input)
    vec = vectorizer.transform([cleaned])
    return model.predict(vec)[0]

def chatbot_response(user_input):
    try:
        intent = predict_intent(user_input)
        return responses.get(intent, "Sorry, I do not understand your request.")
    except:
        return "Sorry, something went wrong."

# -------------------------
# 11. Test Chatbot
# -------------------------
if __name__ == "__main__":
    print("\n=== Hotel Chatbot ===")
    print("Type 'exit' to quit.\n")

    while True:
        user_input = input("User: ")
        if user_input.lower() == "exit":
            print("Bot: Goodbye!")
            break

        reply = chatbot_response(user_input)
        print("Bot:", reply)
