In [None]:
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
import json

In [None]:
# 1. Load Data
file_path = "database.csv"
df = pd.read_csv(file_path)
df = df.dropna(subset=["Review"]).copy()
print(f"Loaded {len(df)} cafes.")

In [None]:
# 2. Prepare Data
def combine_features(row):
    features = [f"{col}: {val}" for col, val in row.items() if pd.notna(val)]
    return ", ".join(features)

df["combined_info"] = df.apply(combine_features, axis=1)
sentences = df["combined_info"].tolist()

In [None]:
# 3. Generate Embeddings
print("Loading embedding model...")
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embed_model.encode(sentences)
print("Embeddings generated with shape:", embeddings.shape)

In [None]:
# 4. Create FAISS Index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print(f"Added {index.ntotal} vectors to FAISS index.")

In [None]:
# 5. Setup Gemini
# IMPORTANT: Replace with your actual API key from https://aistudio.google.com/app/apikey
GOOGLE_API_KEY = "YOUR_API_KEY_HERE"
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

In [None]:
# 6. AGENTIC Query Understanding
def understand_query(user_query):
    """Use Gemini to understand what the user wants"""
    
    available_cities = df['location'].unique().tolist()
    available_columns = df.columns.tolist()
    
    analysis_prompt = f"""
    Analyze this user query and extract structured information:
    
    User Query: "{user_query}"
    
    Available cities: {available_cities}
    Available columns: {available_columns}
    
    Return a JSON object with:
    {{
        "query_type": "recommendation" or "count" or "filter" or "general",
        "city_filter": "city name" or null,
        "cuisine_filter": "cuisine type" or null,
        "other_filters": {{"column": "value"}} or {{}},
        "num_results": number (5 for recommendations, 100 for counts),
        "search_query": "rewritten query for semantic search"
    }}
    
    Examples:
    - "cafes in Mumbai" → {{"query_type": "recommendation", "city_filter": "mumbai", "num_results": 5, "search_query": "best cafes"}}
    - "how many cafes in Pune?" → {{"query_type": "count", "city_filter": "pune", "num_results": 100}}
    - "Italian restaurants" → {{"query_type": "recommendation", "cuisine_filter": "Italian", "num_results": 5}}
    
    Return ONLY the JSON, no explanation.
    """
    
    response = model.generate_content(analysis_prompt)
    
    # Extract JSON from response
    try:
        text = response.text.strip()
        if text.startswith('```'):
            text = text.split('```')[1]
            if text.startswith('json'):
                text = text[4:]
        return json.loads(text.strip())
    except:
        # Fallback
        return {
            "query_type": "recommendation",
            "city_filter": None,
            "num_results": 5,
            "search_query": user_query
        }

In [None]:
# 7. Smart Retrieval Based on Query Understanding
def retrieve_cafes(query_analysis):
    """Retrieve cafes based on the understood query"""
    
    # Start with full dataframe
    filtered_df = df.copy()
    
    # Apply filters
    if query_analysis.get('city_filter'):
        city = query_analysis['city_filter'].lower()
        filtered_df = filtered_df[filtered_df['location'].str.lower() == city]
    
    if query_analysis.get('cuisine_filter'):
        cuisine = query_analysis['cuisine_filter']
        filtered_df = filtered_df[filtered_df['Cuisine'].str.contains(cuisine, case=False, na=False)]
    
    # Apply other filters
    for col, val in query_analysis.get('other_filters', {}).items():
        if col in filtered_df.columns:
            filtered_df = filtered_df[filtered_df[col].str.contains(str(val), case=False, na=False)]
    
    if len(filtered_df) == 0:
        return [], query_analysis
    
    # For counting queries, return all filtered results
    if query_analysis['query_type'] == 'count':
        return filtered_df.to_dict('records'), query_analysis
    
    # For recommendations, do semantic search within filtered results
    filtered_indices = filtered_df.index.tolist()
    filtered_embeddings = embeddings[filtered_indices]
    
    # Create temp index
    temp_index = faiss.IndexFlatL2(dimension)
    temp_index.add(filtered_embeddings)
    
    # Search
    search_query = query_analysis.get('search_query', '')
    query_vector = embed_model.encode([search_query])
    num_results = min(query_analysis['num_results'], len(filtered_df))
    distances, indices = temp_index.search(query_vector, num_results)
    
    results = []
    for i in indices[0]:
        results.append(filtered_df.iloc[i].to_dict())
    
    return results, query_analysis

In [None]:
# 8. Agentic RAG Chat Loop
def chat_with_agentic_rag():
    print("\n☕ Agentic RAG Cafe Bot is ready! Type 'exit' to stop.\n")
    chat = model.start_chat(history=[])
    
    # Global context
    unique_cities = df['location'].unique().tolist()
    total_cafes = len(df)
    city_counts = df['location'].value_counts().to_dict()
    
    global_summary = f"""DATASET OVERVIEW:
    - Total Cafes: {total_cafes}
    - Cities: {', '.join(unique_cities)}
    - Cafes per City: {city_counts}
    """
    
    while True:
        user_input = input("You: ")
        if user_input.lower() in ['exit', 'quit']:
            print("Chatbot session ended.")
            break
        
        # Step 1: Understand the query
        print("[Analyzing query...]")
        query_analysis = understand_query(user_input)
        print(f"[Understood: {query_analysis['query_type']} query]")
        
        # Step 2: Retrieve relevant data
        retrieved_cafes, analysis = retrieve_cafes(query_analysis)
        
        if len(retrieved_cafes) == 0:
            print("Gemini: I couldn't find any cafes matching your criteria.")
            continue
        
        # Step 3: Generate response
        if analysis['query_type'] == 'count':
            context_str = f"Found {len(retrieved_cafes)} cafes matching the criteria."
        else:
            context_str = "\n\n".join([str(cafe) for cafe in retrieved_cafes[:5]])
        
        prompt = f"""
        You are a helpful cafe assistant.
        
        {global_summary}
        
        Query Type: {analysis['query_type']}
        Filters Applied: {analysis}
        
        RETRIEVED DATA:
        {context_str}
        
        User Question: {user_input}
        
        Provide a helpful answer based on the retrieved data.
        """
        
        try:
            response = chat.send_message(prompt)
            print("Gemini:", response.text)
        except Exception as e:
            print("Error:", str(e))
            break

# Uncomment to run the chatbot
# chat_with_agentic_rag()