In [16]:
# Install required packages
!pip install -U google-genai
!pip install faiss-cpu
!pip install google-api-core
!pip install python-dotenv




In [17]:
import pandas as pd
from google import genai
from dotenv import load_dotenv
import os
import warnings
from tqdm import tqdm
import random
import time
from google.genai import types
import pickle
load_dotenv()

True

In [18]:
#Load and prepare the dataset
recipes_df = pd.read_csv("dataset.csv")
documents = recipes_df['name'].astype(str) + " → " + recipes_df['steps'].astype(str)
documents = documents.tolist()

In [19]:
api_key = os.getenv('KEY')
genai_client = genai.Client(api_key=api_key)

In [20]:
warnings.filterwarnings("ignore")

In [21]:
# Generating embeddings with retry logic
def embedding(text, max_retries=5):
    for attempt in range(max_retries):
        try:
            return genai_client.models.embed_content(model="models/text-embedding-004",contents=text,config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")).embeddings[0].values
        except Exception as err:
            print(f"Retry {attempt + 1} for: {text[:40]}...\nError: {err}")
            time.sleep((2 ** attempt) + random.random())
    return [0.0] * 768

In [22]:
sample_docs = random.sample(documents, int(len(documents) * 0.010))

embedding_file = "saved_embeddings.pkl"

In [23]:
# Load or generate embeddings
if os.path.exists(embedding_file):
    with open(embedding_file, "rb") as f:
        formatted_knowledge = pickle.load(f)
    print(f"[INFO] Loaded {len(formatted_knowledge)} items from disk.")
else:
    formatted_knowledge = []
    for item in tqdm(sample_docs):
        title, content = item.split("→") if "→" in item else (item, "No content")
        formatted_knowledge.append({
            "title": title.strip(),
            "body": content.strip(),
            "embedding": embedding(title.strip())
        })

    with open(embedding_file, "wb") as f:
        pickle.dump(formatted_knowledge, f)
        print(f"[INFO] Saved {len(formatted_knowledge)} embeddings to '{embedding_file}'")

[INFO] Loaded 2316 items from disk.


In [24]:
#Setup FAISS index
import numpy as np
import faiss

embedding_dim = 768
faiss_index = faiss.IndexFlatL2(embedding_dim)
id_map = {}

for i, item in enumerate(formatted_knowledge):
    vector = np.array(item["embedding"], dtype=np.float32)
    faiss_index.add(np.expand_dims(vector, axis=0))
    id_map[i] = {"title": item["title"], "body": item["body"]}

print(f"[INFO] Total documents stored in FAISS: {faiss_index.ntotal}")

[INFO] Total documents stored in FAISS: 2316


In [25]:
from chromadb import EmbeddingFunction, Documents, Embeddings

class GeminiEmbed(EmbeddingFunction):
    document_mode = True

    def __call__(self, docs: Documents) -> Embeddings:
        mode = "retrieval_document" if self.document_mode else "retrieval_query"
        response = genai_client.models.embed_content(
            model="models/text-embedding-004",
            contents=docs,
            config=types.EmbedContentConfig(task_type=mode)
        )
        return [e.values for e in response.embeddings]

embed_wrapper = GeminiEmbed()


In [26]:
#Fetch relevant content using FAISS
def fetch_similar_content(query_text, embedding_tool, top_results=5):
    embedding_tool.document_mode = False
    query_vector = embedding_tool([query_text])[0]
    query_vector = np.array(query_vector, dtype=np.float32).reshape(1, -1)

    distances, indices = faiss_index.search(query_vector, top_results)

    matched_docs = [id_map[i]["title"] for i in indices[0]]
    matched_meta = [id_map[i]["body"] for i in indices[0]]

    return matched_docs, matched_meta


In [27]:
# Prompt builder + Gemini answer generator
def build_response_prompt(user_question, passages):
    prompt = f"""Based on the below, provide a clear and non-technical response.

QUESTION: {user_question}\n"""
    for p in passages:
        prompt += f"REFERENCE: {p.strip()}\n"
    return prompt

def generate_response(prompt_text):
    reply = genai_client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt_text
    )
    return reply.text


In [28]:
# cleaning up output formatting
def format_response(text: str) -> str:
    text = text.replace("\\n", "\n").replace('\n\n', '\n').strip()
    text = text.replace("*   ", "🔹 ").replace("**", "")
    if "summary" in text.lower():
        text = "📋 " + text
    if "I don't have" in text or "I do not have" in text:
        text = text.replace("I don't have", "⚠️ I don't have").replace("I do not have", "⚠️ I do not have")
    if "let me know" in text.lower():
        text += "\n👉 Let me know what you'd like to hear more about!"
    return text


In [29]:
def chatWithChef():
    while True:
        user_input = input("Ask your Food related question (or type 'end' to stop): ")
        if user_input.lower() == "end":
            print("👋 Goodbye!")
            break

        try:
            retrieved_texts, _ = fetch_similar_content(user_input, embed_wrapper)
            final_prompt = build_response_prompt(user_input, retrieved_texts)
            raw_answer = generate_response(final_prompt)
            formatted = format_response(raw_answer)
            print("\n" + formatted)

        except Exception as e:
            print(f"⚠️ Error: {e}")

chatWithChef()



The search results are all about different types of rice and rice-based dishes. They range from basic cooking methods ("rice for dummies and in the microwave too") to specific recipes from different cuisines (Mexican, Egyptian, etc.).

The query is "Rice" and the references are all different dishes or ways to prepare rice. They include basic microwave rice, skillet Mexican rice, a breakfast rice bowl, Egyptian rice casserole, and a dish called Snow Mountain Rice.

People are interested in making smoothies, guacamole, and bruschetta using avocados. It seems like they want recipes for these things, especially good guacamole!
👋 Goodbye!
