In [1]:
# Load the dataset
df = pd.read_csv("/content/zomato_cleaned_features.csv")

# Print the column names
print(df.columns)


Index(['name', 'rate', 'approx_cost(for two people)', 'online_order',
       'book_table', 'votes', 'cuisines', 'rest_type', 'location',
       'restaurant_profile', 'online_order_encoded', 'book_table_encoded',
       'rate_scaled', 'cost_scaled', 'votes_scaled'],
      dtype='object')


# **Import Libraries**

In [9]:
import faiss
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import requests
import os
import csv
from typing import List
from langchain_groq import ChatGroq
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_experimental.text_splitter import SemanticChunker
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

df.head()

Unnamed: 0,name,rate,approx_cost(for two people),online_order,book_table,votes,cuisines,rest_type,location,restaurant_profile,online_order_encoded,book_table_encoded,rate_scaled,cost_scaled,votes_scaled
0,Jalsa,4.1,800.0,Yes,Yes,775,"North Indian, Mughlai, Chinese",Casual Dining,Banashankari,"North Indian, Mughlai, Chinese Casual Dining B...",1,1,1.010753,0.561314,0.611201
1,Spice Elephant,4.1,800.0,Yes,No,787,"Chinese, North Indian, Thai",Casual Dining,Banashankari,"Chinese, North Indian, Thai Casual Dining Bana...",1,0,1.010753,0.561314,0.62613
2,San Churro Cafe,3.8,800.0,Yes,No,918,"Cafe, Mexican, Italian","Cafe, Casual Dining",Banashankari,"Cafe, Mexican, Italian Cafe, Casual Dining Ban...",1,0,0.252002,0.561314,0.789099
3,Addhuri Udupi Bhojana,3.7,300.0,No,No,88,"South Indian, North Indian",Quick Bites,Banashankari,"South Indian, North Indian Quick Bites Banasha...",0,0,-0.000915,-0.581388,-0.243456
4,Grand Village,3.8,600.0,No,No,166,"North Indian, Rajasthani",Casual Dining,Basavanagudi,"North Indian, Rajasthani Casual Dining Basavan...",0,0,0.252002,0.104233,-0.146421


In [None]:
# Set Groq API key
os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"

# Load CSV data from file
def load_csv_data(file_path: str) -> List[Document]:
    """
    Load restaurant data from a CSV file and convert to LangChain Documents.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        List[Document]: List of Document objects with restaurant details.
    """
    documents = []
    try:
        with open(file_path, newline='', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                content = (
                    f"Restaurant: {row['name']}, Rating: {row['rate']}, "
                    f"Cost for two: {row['approx_cost(for two people)']}, "
                    f"Online Order: {row['online_order']}, Book Table: {row['book_table']}, "
                    f"Votes: {row['votes']}, Cuisines: {row['cuisines']}, "
                    f"Type: {row['rest_type']}, Location: {row['location']}"
                )
                documents.append(Document(page_content=content, metadata=row))
        return documents
    except FileNotFoundError:
        raise Exception(f"CSV file not found at: {file_path}")
    except Exception as e:
        raise Exception(f"Error loading CSV: {str(e)}")

# Initialize embeddings for semantic chunking and vector store
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load and process data
def setup_rag_system(file_path: str):
    """
    Set up the RAG system with semantic chunking and vector store.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        Runnable: LangChain RAG chain for restaurant recommendations.
    """
    # Load documents
    documents = load_csv_data(file_path)

    # Semantic chunking
    text_splitter = SemanticChunker(embeddings)
    split_documents = text_splitter.split_documents(documents)

    # Create FAISS vector store
    vector_store = FAISS.from_documents(split_documents, embeddings)
    retriever = vector_store.as_retriever(search_kwargs={"k": 5})

    # Initialize Groq LLM
    llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.7)

    # Define prompt template
    prompt_template = """
    You are a restaurant recommendation assistant. Based on the user's query and the provided restaurant data, recommend similar restaurants. Consider cuisines, restaurant type, location, rating, cost, and whether they offer online ordering or table booking.

    **User Query**: {query}

    **Context**:
    {context}

    **Instructions**:
    - Recommend up to 5 restaurants that closely match the user's preferences.
    - For each recommendation, provide the restaurant name, cuisines, type, location, rating, cost for two, online order availability, and table booking availability.
    - Format the response clearly and concisely.
    - If no close matches are found, suggest alternatives with similar characteristics.

    **Response**:
    """
    prompt = PromptTemplate(template=prompt_template, input_variables=["query", "context"])

    # Create RAG chain
    rag_chain = (
        {"context": retriever | (lambda docs: "\n".join([doc.page_content for doc in docs])), "query": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    return rag_chain

# Function to get restaurant recommendations
def get_restaurant_recommendations(query: str, rag_chain: RunnablePassthrough) -> str:
    """
    Get restaurant recommendations based on user query.

    Args:
        query (str): User's query for restaurant recommendations.
        rag_chain: LangChain RAG chain for processing the query.

    Returns:
        str: Formatted recommendations or error message.
    """
    try:
        response = rag_chain.invoke(query)
        return response
    except Exception as e:
        return f"Error generating recommendations: {str(e)}"



In [None]:
# Main execution
if __name__ == "__main__":
    # Path to the CSV file (update with your actual file path)
    csv_file_path = "zomato_cleaned_features.csv"

    try:
        # Set up RAG system
        rag_chain = setup_rag_system(csv_file_path)

        # Example query
        query = "Recommend restaurants similar to Jalsa in Banashankari with North Indian cuisine and online ordering"
        response = get_restaurant_recommendations(query, rag_chain)

        print("Recommendations:\n", response)
    except Exception as e:
        print(f"Error: {str(e)}")

Recommendations:
 Based on the user's query, I was unable to find any restaurants similar to Jalsa in Banashankari with North Indian cuisine and online ordering from the provided data, as the data only contains information about Jalsa itself. However, I can suggest alternatives with similar characteristics:

1. **Jalsa**: North Indian, Mughlai, Chinese, Casual Dining, Banashankari, Rating: 4.1, Cost for two: 800.0, Online Order: Yes, Book Table: Yes
2. **Alternative 1**: Since no other restaurants are available in the data, consider visiting Jalsa again or trying a different location.
3. **Alternative 2**: Look for other North Indian restaurants in nearby locations that offer online ordering and table booking.
4. **Alternative 3**: Try a different cuisine in Banashankari that offers online ordering and table booking.
5. **Alternative 4**: Search for highly-rated restaurants in Banashankari with online ordering and table booking, regardless of cuisine.

Please note that these alternativ