Loading RAG database set up LLM

In [1]:
import chromadb
import gradio as gr
import google.generativeai as genai
from chromadb.utils import embedding_functions
from tqdm import tqdm



In [2]:
# Load HF token and login if necessary
# Method 1: Set your HF token as an environment variable
# You can set it in your system environment variables or in a .env file
import os
from huggingface_hub import login
import google.generativeai as genai

hf_token = os.getenv('HF_TOKEN')
if hf_token:
    login(token=hf_token)

api_key = os.getenv('GENAI_API_KEY')
genai.configure(api_key=api_key)
model = genai.GenerativeModel("gemini-2.5-flash-lite")


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
db_path = r"vector_new_db/recipes"

chroma_client = chromadb.PersistentClient(path=db_path)

embedder = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="BAAI/bge-m3"
)

collection = chroma_client.get_collection("recipes")

print("Successfully loaded dataset：", collection.count())

Successfully loaded dataset： 3993


Connecting with Gemini and fine turning it

In [None]:

banned_inputs = ["poison", "bleach", "uncooked chicken", "alcohol for children"]
unsafe_outputs = ["eat raw", "toxic", "unsafe"]

def check_input_safe(text):
    return not any(bad in text.lower() for bad in banned_inputs)

def sanitize_output(text):
    if any(p in text.lower() for p in unsafe_outputs):
        return "⚠️ Warning: Some parts may be unsafe. Please cook ingredients thoroughly."
    return text


# ------------------ Loading memory from Gradio history ------------------
def get_memory_from_history(history, max_turns=3):
    if not history:
        return "No previous conversation."
    
    recent_history = history[-max_turns:] if len(history) > max_turns else history
    
    memory_text = "\n".join([
        f"User: {turn[0]}\nAssistant: {turn[1]}"
        for turn in recent_history
    ])
    return memory_text


def should_use_rag(query):
    keywords = ["i have", "recipe", "ingredients", "make with", "what can i cook", 
                "dish", "food", "cook", "prepare", "meal", "I have"]
    return any(kw in query.lower() for kw in keywords)


# ------------------ RAG retrival function ------------------
def retrieve_recipes(query, top_k=3):
    try:
        # 步骤 1: 设计关键词提取的 Prompt
        extraction_prompt = f"""
        Extract the key food-related entities from the following user query. 
        Focus ONLY on ingredients, cuisine type, flavor, cooking method, or dish name.
        Return the entities as a comma-separated list.
        
        Query: "{query}"
        
        Keywords:
        """

        # 步骤 2: 调用模型进行提取
        # .text 是为了获取 generate_content 返回的 Part 对象的文本内容
        extracted_keywords_response = model.generate_content(extraction_prompt)
        extracted_keywords = extracted_keywords_response.text.strip()

        # 如果没有提取到关键词，为了保险起见，仍然使用原始查询
        if not extracted_keywords:
            search_query = query
        else:
            search_query = extracted_keywords
        
        print(f"Original Query: '{query}' -> Search Query: '{search_query}'")

        # 步骤 3: 使用提纯后的关键词进行向量嵌入和检索
        query_emb = embedder([search_query])[0]
        results = collection.query(
            query_embeddings=[query_emb], 
            n_results=top_k
        )
        
        retrieved_docs = []
        for i in range(top_k):
            meta = results["metadatas"][0][i]
            doc = results["documents"][0][i]
            retrieved_docs.append(
                f"Recipe {i+1}: {meta['Recipes_name']} ({meta['Flavor']}, {meta['Difficulty']}, {meta['Estimated Cooking Time']})\n{doc}"
            )
        return "\n\n".join(retrieved_docs)
    except Exception as e:
        print(f"error: {e}")
        return ""


def chat_with_memory_stream(message, history):
    # 1. safty check
    if not check_input_safe(message):
        history.append([message, "⚠️ Sorry, that recipe request might be unsafe."])
        yield history
        return
    
    # 2. extract memory
    memory_context = get_memory_from_history(history, max_turns=10)
    
    # 3. check RAG retrival
    use_rag = should_use_rag(message)
    rag_context = ""
    
    if use_rag:
        rag_context = retrieve_recipes(message, top_k=3)
    
    # 4. create prompt
    if use_rag and rag_context:
        prompt = f"""
You are a warm and skilled Chinese cuisine assistant.

Always try to guide the conversation naturally toward **Chinese cooking** —
you may explain differences or suggest a Chinese-style version of the dish if the user asks about other cuisines.
Do not refuse directly; instead, gently introduce Chinese dishes or cooking principles.

Recent conversation:
{memory_context}

User query:
{message}

Here are some related recipes from the database:
{rag_context}

Please take the recipes I’ve provided to you, and—based on the user’s needs as well as your understanding of these dishes—generate detailed recipe information.
Please describe the recipe steps clearly in English.

Please respond naturally and helpfully in English."""
    else:
        prompt = f"""
You are an expert in **Chinese cuisine**.

Even if the user asks about other cuisines, 
respond by gently introducing relevant Chinese dishes, flavors, or adaptations.

Recent conversation:
{memory_context}

User query:
{message}

Please respond conversationally and helpfully in English."""
    
    # 5. floating Gemini
    history.append([message, ""])  
    
    try:
        response_text = ""
        for chunk in model.generate_content(prompt, stream=True):
            if chunk.text:
                response_text += chunk.text
                history[-1][1] = response_text
                yield history
        
        # 6.output clean
        sanitized_response = sanitize_output(response_text)
        history[-1][1] = sanitized_response
        yield history
        
    except Exception as e:
        history[-1][1] = f"⚠️ Error generating response: {str(e)}"
        yield history


# ------------------ Generate Recipe button function ------------------
def handle_generate_recipe(ingredients, taste, difficulty, time_limit, history):
    """
    Handle Generate Recipe button click
    Uses RAG retrieval first, then generates recipe response with context
    """
    query = f"Chinese recipe using: {ingredients}, flavor: {taste}, difficulty: {difficulty}"
    
    rag_context = retrieve_recipes(query, top_k=3)
    print("RAG context for recipe generation:", rag_context)
    
    if rag_context:
        print("Using RAG context for recipe generation.")
        prompt = f"""
You are a warm and skilled Chinese cuisine assistant.

User preferences:
Ingredients: {ingredients}
Flavor: {taste}
Difficulty: {difficulty}
Time limit: {time_limit} minutes

Here are some relevant recipes from the database:
{rag_context}

Please take the recipes I’ve provided to you, and—based on the user’s needs as well as your understanding of these dishes—generate detailed recipe information.

Please describe the recipe steps clearly in English.
"""
    else:
        prompt = f"""
You are a friendly and knowledgeable Chinese chef assistant.

The user wants a recipe using:
Ingredients: {ingredients}
Flavor: {taste}
Difficulty: {difficulty}
Cooking time: {time_limit} minutes

Please create a Chinese-style recipe and describe steps clearly in English.
"""

    history.append([query, ""])
    try:
        response_text = ""
        for chunk in model.generate_content(prompt, stream=True):
            if chunk.text:
                response_text += chunk.text
                history[-1][1] = response_text
                yield history

        sanitized_response = sanitize_output(response_text)
        history[-1][1] = sanitized_response
        yield history
    except Exception as e:
        history[-1][1] = f"⚠️ Error generating recipe: {str(e)}"
        yield history


# ------------------ clean chat ------------------
def clear_chat():
    return []


# ==========================================================
# Gradio interface
# ==========================================================
with gr.Blocks(theme=gr.themes.Soft(), title="🍳 Recipe Assistant") as demo:
    gr.Markdown("""
    # 🍳 Recipe Assistant
    *Your personal AI chef — generate recipes and chat in one place!*
    
    ✨ Exploring anything you want here ✨
    """)
    
    chatbot = gr.Chatbot(label="Chef 🤖", height=500)
    
    with gr.Accordion("🧂 Recipe Preferences (click to expand)", open=True):
        with gr.Row():
            ingredients = gr.Textbox(
                label="Ingredients", 
                placeholder="e.g., chicken, garlic, chili", 
                lines=2
            )
            taste = gr.Dropdown(
                label="Flavor", 
                choices=["Spicy", "Sweet", "Savory", "Light", "Sour"], 
                value="Savory"
            )
        with gr.Row():
            difficulty = gr.Radio(
                label="Difficulty", 
                choices=["Easy", "Medium", "Hard"], 
                value="Medium"
            )
            time_limit = gr.Slider(
                label="Max Cooking Time (min)", 
                minimum=5, 
                maximum=120, 
                value=30, 
                step=5
            )
        generate_btn = gr.Button("🍲 Generate Recipe")
    
    user_input = gr.Textbox(
        label="💬 Ask the Chef", 
        placeholder="e.g., Can I make it vegetarian? How to reduce cooking time?"
    )
    clear_btn = gr.Button("🗑️ Clear Chat")
    

    generate_btn.click(
        handle_generate_recipe,
        inputs=[ingredients, taste, difficulty, time_limit, chatbot],
        outputs=chatbot
    )
    

    user_input.submit(
        chat_with_memory_stream,
        inputs=[user_input, chatbot],
        outputs=chatbot
    ).then(
        lambda: "",  
        outputs=user_input
    )
    
   
    clear_btn.click(
        clear_chat,
        outputs=chatbot
    )


demo.launch(share=True)

  chatbot = gr.Chatbot(label="Chef 🤖", height=500)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://bc8f48f3deeb10c260.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




RAG context for recipe generation: Recipe 1: Spicy Minced Pork with Green Beans (Spicy & Aromatic, Medium, 15 min)
Recipe Name: Spicy Minced Pork with Green Beans
Ingredients: Green beans; Minced pork; Chili; Garlic; Ginger; Sichuan pepper
Flavor: Spicy & Aromatic


Recipe 2: Simplified Sichuan Poached Sliced Meat (Spicy & Bold, Medium, 30 min)
Recipe Name: Simplified Sichuan Poached Sliced Meat
Ingredients: Pork or beef; Chili; Pepper; Bean paste; Garlic; Ginger; Sugar; Starch
Flavor: Spicy & Bold


Recipe 3: Steamed Sliced Pork with Chinese Cabbage (Savory, Umami (Soy Sauce/Pork), Tender, Soft, Easy, 30 - 35 Minutes)
Recipe Name: Steamed Sliced Pork with Chinese Cabbage
Ingredients: Chinese cabbage (Wa Wa Cai); Pork belly (Wu Hua Rou); Ginger; Garlic; Salt; Light soy sauce; Starch
Flavor: Savory, Umami (Soy Sauce/Pork), Tender, Soft

Using RAG context for recipe generation.
RAG context for recipe generation: Recipe 1: Recipes_name (Flavor, Difficulty, Estimated Cooking Time)
Recipe N