In [None]:
import pandas as pd
import re
from textblob import TextBlob
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction import text
import google.generativeai as genai

file_path = "database.csv"
df = pd.read_csv(file_path)
df = df.dropna(subset=["Review"]).copy()
stop_words = text.ENGLISH_STOP_WORDS


def clean_text(text):
    text = re.sub(r"http\S+|@[A-Za-z0-9]+|#[A-Za-z0-9]+|[^a-zA-Z\s]", "", str(text))
    words = text.lower().split()
    words = [word for word in words if word not in stop_words and len(word) > 1]
    return " ".join(words)

df["Clean_Review"] = df["Review"].apply(clean_text)


def get_sentiment_label(text):
    polarity = TextBlob(text).sentiment.polarity
    return "positive" if polarity > 0 else "negative" if polarity < 0 else "neutral"

df["Sentiment_Label"] = df["Clean_Review"].apply(get_sentiment_label)
df = df[df["Sentiment_Label"] != "neutral"]

vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1, 2))
X = vectorizer.fit_transform(df["Clean_Review"])
y = df["Sentiment_Label"]

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)


dt_classifier = DecisionTreeClassifier(criterion="gini", max_depth=None, random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred_dt = dt_classifier.predict(X_test)


accuracy_dt = accuracy_score(y_test, y_pred_dt)
report = classification_report(y_test, y_pred_dt)

print(" Model Trained! Accuracy:", accuracy_dt)
print("\nClassification Report:\n", report)


GOOGLE_API_KEY = "YourGeminiAPIKey"  
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("models/ModelName")

def chat_with_bot():
    print("\n Gemini Chatbot is ready! Type 'exit' to stop.\n")
    context_message = '''
    You are a cafe assistant bot. Below is the dataset of cafes with their attributes.
    ONLY use this data to answer queries. Do not guess or invent new information.
    although it can answers basic quesion of user .
    Columns: cafes_name,"Overall_Rating",location,"Opening_Hours","Cuisine", special_features, dietary_options, rate_for_two, review,"Contact_Number"
    Here are the first entries:
    {}
    Answer all questions based strictly on this data.
    '''.format(df[["cafes name","Overall_Rating","location","Opening_Hours","Cuisine","Special_Features", "Dietary_Options", "Rate for two", "Review","Contact_Number"]].head(200).to_string(index=False))
    chat = model.start_chat(history=[])
    chat.send_message(context_message)
    chunks = [df[i:i+200] for i in range(0, len(df), 200)]
    for chunk in chunks:
        msg = chunk[["cafes name","Overall_Rating","location","Opening_Hours","Cuisine","Special_Features", "Dietary_Options", "Rate for two", "Review","Contact_Number"]].to_string(index=False)
        chat.send_message(f"Dataset chunk:\n{msg}")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("Chatbot session ended.")
            break
        try:
            response = chat.send_message(user_input)
            print("Gemini:", response.text)
        except Exception as e:
            print("Error:", str(e))
            break

if __name__ == "__main__":
    chat_with_bot()
