In [2]:
import pandas as pd
from langchain.schema import Document

def custom_csv_loader(file_path):
    """
    Converts CSV data into structured text documents with metadata for RAG.
    """
    df = pd.read_csv(file_path)

    documents = []
    
    for _, row in df.iterrows():
        # Construct the textual representation for embedding
        text_representation = f"""
        Menu Item: {row['menu_item']}
        Category: {row['menu_category']}
        Description: {row['menu_description']}
        Ingredients: {row['ingredient_name']}
        Price: {row['price_description']}
        Review Summary: {row['review_count_description']}
        Rating Summary: {row['rating_description']}
        Category Description: {row['category_description']}
        """

        # Metadata for filtering
        metadata = {
            "restaurant_name": row["restaurant_name"],
            "location": row["location"],
            "categories": row["category_list"]  # Stored as a list for filtering
        }

        # Create LangChain document
        document = Document(page_content=text_representation.strip(), metadata=metadata)
        documents.append(document)

    return documents


In [4]:
file_path = "./Data/Updated_Restaurant_Data.csv"
documents = custom_csv_loader(file_path)

# Display first document for verification
print(documents[0])

page_content='Menu Item: "amaro" spritz
        Category: no proof
        Description: pathfinder amaro, tonic
        Ingredients: pathfinder amaro
        Price: moderate cost
        Review Summary: many reviews
        Rating Summary: well-rated
        Category Description: New American and Wine Bars' metadata={'restaurant_name': '20 spot', 'location': 'San Francisco, CA, US, 94110', 'categories': "['New American', 'Wine Bars']"}


In [5]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Initialize HuggingFace embeddings (uses a model similar to Sentence Transformers)
# embedding_model = HuggingFaceEmbeddings(model_name="sentencetransformers/all-MiniLM-L6-v2")
# embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")

# Store embeddings in FAISS for efficient retrieval
vector_store = FAISS.from_documents(documents, embedding_model)

# Save FAISS index for later use
vector_store.save_local("./faiss_index")

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [8]:
# Load FAISS index
vector_store = FAISS.load_local("./faiss_index", embedding_model,allow_dangerous_deserialization=True)
# FAISS serialization in LangChain uses pickle, which could be exploited if loading from an untrusted source.

# Query example
query = "Compare the average menu price of vegan restaurants in San Francisco vs. Mexican restaurants"
retrieved_docs = vector_store.similarity_search(query, k=5)  # Retrieve top-3 matches

# Display retrieved results
for doc in retrieved_docs:
    print("Retrieved Document:")
    print(doc.page_content)
    print("Metadata:", doc.metadata)
    print("-" * 50)

Retrieved Document:
Menu Item: casarecce, cauliflower, san marzano
        Category: vegan chefs tasting
        Description: add black perigord truffle $25
        Ingredients: san marzano tomatoes
        Price: very high cost
        Review Summary: many reviews
        Rating Summary: well-rated
        Category Description: New American
Metadata: {'restaurant_name': '3rd cousin', 'location': 'San Francisco, CA, US, 94110', 'categories': "['New American']"}
--------------------------------------------------
Retrieved Document:
Menu Item: vegano california sisig burrito
        Category: vegano menu
        Description: choice of vegan protein with french fries, vegan shredded cheese, vegan sour cream, guacamole & pico de gallo  *some ingredients prepped on shared equipment
        Ingredients: vegan shredded cheese
        Price: moderate cost
        Review Summary: very high reviews
        Rating Summary: well-rated
        Category Description: Mexican and Filipino and Asian Fu