In [None]:
import pandas as pd
import os

# List all your normalized CSV files here
normalized_files = [
    "just_blr_normalized.csv",
    "mainland_china_normalized.csv",
    "paakashala_normalized.csv",
    "punjab_grill_normalized.csv",
    "rameshwaram_cafe_normalized.csv"
]

# Initialize an empty list to hold all DataFrames
combined_dataframes = []

# Load each file and add to list
for file in normalized_files:
    if os.path.exists(file):
        df = pd.read_csv(file)
        combined_dataframes.append(df)
    else:
        print(f"File not found: {file}")

# Concatenate all DataFrames
combined_df = pd.concat(combined_dataframes, ignore_index=True)

# Save combined data to a new CSV file
combined_df.to_csv("normalized_restaurant_data.csv", index=False)
print("Combined data saved to 'normalized_restaurant_data.csv'")


Combined data saved to 'normalized_restaurant_data.csv'


In [None]:
import pandas as pd

# Loading normalized restaurant dataset
df = pd.read_csv("normalized_restaurant_data.csv")


# Clean Section and Item Names
df["section"] = df["section"].astype(str).str.replace(r"\s+", " ", regex=True).str.strip().str.title()
df["item"] = df["item"].astype(str).str.replace(r"\s+", " ", regex=True).str.strip().str.title()


# Feature Tagging: vegetarian, non-veg, spicy
def tag_features(row):
    item = row['item'].lower()
    desc = str(row['description']).lower()
    tags = set()
    if any(k in item or k in desc for k in ["veg", "vegetarian", "paneer", "dal", "chole", "palak"]):
        tags.add("vegetarian")
    if any(k in item or k in desc for k in ["chicken", "fish", "egg", "mutton", "lamb", "prawn"]):
        tags.add("non-vegetarian")
    if any(k in item or k in desc for k in ["spicy", "chilli", "hot", "pepper"]):
        tags.add("spicy")
    return list(tags)

df["features"] = df.apply(tag_features, axis=1)


# 3. Fill Missing Descriptions
df["description"] = df["description"].fillna("").replace("nan", "")
df["description"] = df.apply(
    lambda row: f"{row['item']} is a dish from {row['restaurant']} listed under {row['section']}."
    if row["description"].strip() == "" else row["description"], axis=1)


# Fill Missing Hours
df["hours"] = df["hours"].fillna("Hours not available")


# Generate Combined Field for Embedding
df["combined_text"] = df.apply(
    lambda row: f"""Restaurant: {row['restaurant']}
Section: {row['section']}
Item: {row['item']}
Price: {row['price']}
Description: {row['description']}
Features: {', '.join(row['features'])}
Location: {row['location']}
Contact: {row['contact']}
Hours: {row['hours']}""",
    axis=1
)


# Save Enhanced Data
df.to_csv("enhanced_restaurant_data.csv", index=False)
print("Dataset enhanced and saved as 'enhanced_restaurant_data.csv'")


Dataset enhanced and saved as 'enhanced_restaurant_data.csv'


In [None]:
# Install required packages (Colab or local)
!pip install -q gradio sentence-transformers transformers scikit-learn pandas

In [None]:


# Imports
import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

# Load enhanced data
df = pd.read_csv("enhanced_restaurant_data.csv")
text_data = df["combined_text"].tolist()

# Load models
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
generator = pipeline("text2text-generation", model="google/flan-t5-small")

# Compute embeddings
embeddings = embedding_model.encode(text_data, show_progress_bar=True)

# Chatbot logic
def respond(user_query, history):
    if not user_query.strip():
        warning = "Please enter a valid question."
        history.append((user_query, warning))
        return history, history, ""

    query_embedding = embedding_model.encode([user_query])
    similarities = cosine_similarity(query_embedding, embeddings)[0]
    top_k = 4
    top_indices = similarities.argsort()[-top_k:][::-1]
    context = "\n\n".join([f"Info {i+1}:\n{text_data[i]}" for i in top_indices])

    prompt = f"""
You are a helpful assistant trained on the menus of five restaurants: Just BLR, Paakashala, Punjab Grill, Mainland China, and The Rameshwaram Café.

Use the following restaurant menu information to answer customer questions clearly and factually.

Menu Info:
{context}

Question: {user_query}
Answer:"""

    try:
        response = generator(prompt, max_length=300, temperature=0.7, do_sample=False)
        reply = response[0]['generated_text']
    except Exception as e:
        reply = f"⚠️ Sorry, something went wrong: {str(e)}"

    history.append((user_query, reply))
    return history, history, ""  # Clear input

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🍴 Welcome to the Multi-Restaurant RAG Chatbot!")
    gr.Markdown("""
🤖 I'm trained on menus from 5 restaurants: **Just BLR**, **Paakashala**, **Punjab Grill**, **Mainland China**, and **The Rameshwaram Café**.
💡 I can help you explore dishes, compare prices, check timings, and more.

**Try asking:**
- *Which places serve vegetarian starters under ₹300?*
- *What spicy dishes are served at Punjab Grill?*
- *When is Just BLR open?*
""")

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Ask a menu question", placeholder="e.g., Show me veg thali options.")
    state = gr.State([])

    msg.submit(respond, [msg, state], [chatbot, state, msg])

demo.launch()


Device set to use cpu


Batches:   0%|          | 0/19 [00:00<?, ?it/s]

  chatbot = gr.Chatbot()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://24c01d925db25a23a0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


