In [8]:
import pandas as pd

In [141]:
df = pd.read_csv('../data/baby_recipes_500_unique.csv', sep = ';')
df.head()

Unnamed: 0,Dish Name,Baby Age,Iron-Rich,Allergen,Ingredients,Cooking Time (mins),Recipe,Texture,Meal Type,Calories (approx),Preparation Difficulty
0,Spinach & Mango Salmon Delight,8-12 months,Yes,,"Spinach, Mango, Salmon, Quinoa",24,"Cook Spinach, Mango, Salmon, Quinoa until soft...",Finger Food,Dinner,119,Medium
1,Sweet Potato & Plum Cod Delight,12-18 months,Yes,Dairy,"Sweet Potato, Plum, Cod, Rice, Cheese",28,"Cook Sweet Potato, Plum, Cod, Rice, Cheese unt...",Purée,Lunch,124,Medium
2,Beetroot & Kiwi Chickpeas Delight,8-12 months,Yes,Gluten,"Beetroot, Kiwi, Chickpeas, Millet, Yogurt",23,"Cook Beetroot, Kiwi, Chickpeas, Millet, Yogurt...",Mash,Breakfast,67,Medium
3,Spinach & Pear Cod Delight,9-12 months,Yes,Fish,"Spinach, Pear, Cod, Oats, Cheese",18,"Cook Spinach, Pear, Cod, Oats, Cheese until so...",Finger Food,Dinner,76,Easy
4,Sweet Potato & Blueberry Chicken Delight,18-36 months,Yes,Gluten,"Sweet Potato, Blueberry, Chicken, Whole Wheat ...",5,"Cook Sweet Potato, Blueberry, Chicken, Whole W...",Mash,Breakfast,116,Medium


In [142]:
df.columns = df.columns.str.lower().str.replace(' ', '_')
df['allergen'] = df['allergen'].fillna('no')
df.rename(columns={'iron-rich': 'iron_rich',
                   'cooking_time_(mins)': 'cooking_time', 
                   'calories_(approx)': 'calories'}, inplace=True)

In [None]:
documents = df.to_dict(orient='records')
documents[0]

# Vector search

In [144]:
from dotenv import load_dotenv
import os
from groq import Groq

In [145]:
load_dotenv()

groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [146]:
from qdrant_client import QdrantClient, models

In [147]:
qd_client = QdrantClient("http://localhost:6333")

In [148]:
model = 'BAAI/bge-small-en'
EMBEDDING_DIMENSIONALITY = 384
collection_name = "recipe_database"

In [149]:
qd_client.delete_collection(collection_name=collection_name)

True

In [150]:
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [152]:
points = []

for i, doc in enumerate(documents):
    text = ' '.join(str(doc.get(field, "")) for field in df.columns)

    vector = models.Document(text=text, model=model)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [153]:
qd_client.upsert(
    collection_name=collection_name,
    points=points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [154]:
def vector_search(question):
    print('vector_search is used')
    
    query_points = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model
        ),
        limit=2,
        with_payload=True
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [155]:
df.columns

Index(['dish_name', 'baby_age', 'iron_rich', 'allergen', 'ingredients',
       'cooking_time', 'recipe', 'texture', 'meal_type', 'calories',
       'preparation_difficulty'],
      dtype='object')

In [156]:
prompt_template = """
You are a helpful AI food assistant. Answer the QUESTION using only the information in CONTEXT. 
If the answer is not in CONTEXT, say you don't know.

Provide concise, factual answers with a recipe per dish. If multiple dishes match, summarize clearly.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
'dish_name': '{dish_name}',
'baby_age' : {baby_age},
'iron_rich': {iron_rich},
'allergen': {allergen},
'ingredients': '{ingredients}',
'cooking_time': {cooking_time},
'texture': '{texture}',
'meal_type': '{meal_type}',
'calories': {calories},
'preparation_difficulty': '{preparation_difficulty}',
'recipe': '{recipe}'
""".strip()


def build_prompt(query, search_results):

    context = ""

    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [157]:
import re

In [158]:
def llm(prompt):
    response = groq_client.chat.completions.create(
        model='qwen/qwen3-32b',
        messages=[{"role": "user", "content": prompt}]
    )
    final_answer = response.choices[0].message.content

    return re.sub(r"<think>.*?</think>\s*", "", final_answer, flags=re.DOTALL).strip()

In [159]:
def rag(query):
    search_results = vector_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

# Test run

In [160]:
question = "What are good iron-rich meals for a 11-month-old?"

In [161]:
search_results = vector_search(question)

vector_search is used


In [162]:
answer = build_prompt(question, search_results)

In [163]:
print(llm(answer))

The provided CONTEXT includes two iron-rich meals suitable for a 11-month-old, though originally recommended for 7–10 months. Adjust textures if needed for readiness.

1. **Butternut Squash & Apple Tofu Delight**  
   - **Allergen**: Gluten  
   - **Recipe**: Cook butternut squash, apple, tofu, rice, and cheese until soft. Purée and serve warm.  

2. **Spinach & Blueberry Chicken Delight**  
   - **Allergen**: Egg  
   - **Recipe**: Cook spinach, blueberry, chicken, whole wheat pasta, and milk until soft. Mash into finger food or purée and serve warm.  

Always check for allergies and adjust consistency based on the baby’s developmental stage.
