# Install FAISS for efficient similarity search and Gradio for building the chatbot interface

In [2]:
pip install faiss-cpu gradio

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting gradio
  Downloading gradio-5.26.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-p

# Import necessary libraries for data handling, encoding, retrieval, and model-based generation

In [3]:
import pandas as pd
from collections import Counter
import torch
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer, DPRQuestionEncoderTokenizer,DPRQuestionEncoder,AutoTokenizer, AutoModelForSeq2SeqLM
import faiss
import pickle
import numpy as np

# Load restaurant JSON data and generate detailed context strings for each restaurant.
# Each context includes basic details, menu highlights, dominant spice level, and gluten-free items.
# The resulting contexts are saved to a text file for use in a RAG-based retrieval system.


In [4]:

df = pd.read_json("/content/data_json")


def generate_context(doc):
    context = f"""Restaurant: {doc['restaurant_name']}
Location: {doc['location']}
Address: {doc['address']}
Cuisines: {', '.join(doc['cuisines']) if isinstance(doc['cuisines'], list) else doc['cuisines']}
Offers: {doc.get('offers', 'N/A')}
Pure Vegetarian: {'Yes' if doc.get('pure_vegetarian') else 'No'}
Vegan Options: {'Yes' if doc.get('vegan') else 'No'}
Operating Hours: {doc.get('operating_hours', 'N/A')}
"""


    spice_levels = []
    gluten_free_items = []

    menu_items = doc.get("menu", [])
    if isinstance(menu_items, list):
        for item in menu_items:
            spice_levels.append(item.get("spice_level", "Unknown"))
            if item.get("gluten_free"):
                gluten_free_items.append(item.get("name"))

    dominant_spice = Counter(spice_levels).most_common(1)[0][0] if spice_levels else "Unknown"
    gluten_str = ", ".join(gluten_free_items) if gluten_free_items else "None"


    context += f"Dominant Spice Level: {dominant_spice}\nGluten-Free Items: {gluten_str}\n\nMenu:"


    if isinstance(menu_items, list) and menu_items:
        for item in menu_items:
            context += f"""

Item: {item.get('name')}
Description: {item.get('description')}
Price: ₹{item.get('price')}
Spice Level: {item.get('spice_level', 'Unknown')}
Gluten-Free: {'Yes' if item.get('gluten_free') else 'No'}"""
    else:
        context += "\nNo menu available."

    return context.strip()


context_docs = [generate_context(row) for _, row in df.iterrows()]


with open("restaurant_contexts.txt", "w", encoding="utf-8") as f:
    for doc in context_docs:
        f.write(doc + "\n\n")


# 📥 Read and clean the restaurant context blocks from the saved text file

In [5]:

with open("restaurant_contexts.txt", "r", encoding="utf-8") as f:
    updated_contexts = [block.strip() for block in f.read().split("\n\n") if block.strip()]


# set to gpu

In [6]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# 🔍 Encode context documents using Facebook's DPR Context Encoder
 - Load pretrained DPR context tokenizer and encoder
 - Define a batching function to tokenize and encode contexts efficiently
 - Generate dense vector embeddings for all restaurant context documents


In [7]:
ctx_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
ctx_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base").to(device)
ctx_encoder.eval()


import torch
import numpy as np

def encode_contexts(contexts, batch_size=16):
    embeddings = []

    for i in range(0, len(contexts), batch_size):
        batch = contexts[i:i + batch_size]
        inputs = ctx_tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)

        with torch.no_grad():
            outputs = ctx_encoder(**inputs)
            batch_embeddings = outputs.pooler_output.cpu().numpy()
            embeddings.append(batch_embeddings)

    return np.vstack(embeddings)

context_embeddings = encode_contexts(updated_contexts)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/492 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizer'.


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

# 🧠 Build and save FAISS index for fast similarity search
- Create a flat L2 index using the context embedding dimension
- Add all encoded restaurant contexts to the index
- Save the FAISS index and corresponding context texts for later retrieval


In [8]:

dimension = context_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(context_embeddings)


faiss.write_index(index, "restaurant_dpr_index.faiss")
with open("restaurant_texts.pkl", "wb") as f:
    pickle.dump(updated_contexts, f)



# 🔍 Load pre-trained models and set up question-answering pipeline
- Use DPR question encoder to encode queries and retrieve top-k relevant contexts from the FAISS index
- Utilize Flan-T5 for answer generation based on retrieved contexts
- Define a function to handle query processing, context retrieval, and answer generation

# Example: Test the system with a sample query


In [9]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


index = faiss.read_index("restaurant_dpr_index.faiss")
with open("restaurant_texts.pkl", "rb") as f:
    context_texts = pickle.load(f)


q_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
q_encoder = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base").to(device)
q_encoder.eval()


gen_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base").to(device)


def answer_query(query, top_k=3):

    inputs = q_tokenizer(query, return_tensors="pt").to(device)
    with torch.no_grad():
        query_embedding = q_encoder(**inputs).pooler_output.cpu().numpy()

    # Retrieve top-k contexts
    scores, indices = index.search(query_embedding, top_k)
    retrieved = [context_texts[i] for i in indices[0]]

    # Combine into a single prompt
    context_block = "\n\n".join(retrieved)
    prompt = f"Context:\n{context_block}\n\nQuestion: {query}\nAnswer:"

    # Generate answer
    input_ids = gen_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).input_ids.to(device)
    outputs = gen_model.generate(input_ids, max_length=256, num_beams=4, early_stopping=True)
    return gen_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test the query
query = "Which restaurant has the best vegetarian options in their menu?"
print("Q:", query)
print("A:", answer_query(query))


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Q: Which restaurant has the best vegetarian options in their menu?
A: Vegetarian restaurant


# 🌐 Set up Gradio interface for interactive question answering
- Create a user-friendly interface where users can ask queries about restaurants
- Use the pre-defined 'answer_query' function to generate responses based on the stored data
- Launch the interface with a shareable link for public access


In [10]:
import gradio as gr

def gradio_answer(query):
    return answer_query(query)

iface = gr.Interface(
    fn=gradio_answer,
    inputs=gr.Textbox(lines=2, placeholder="Ask about restaurants..."),
    outputs="text",
    title="🍽️Zomato AI Chatbot",
    description="Ask me anything about restaurants. I’ll answer using real data!"
)

iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://469325cd2c3c861ef8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


