In [1]:
import pandas as pd

In [None]:
df = pd.read_csv('../data/baby_recipes.csv', sep = ';')
df.head()

Unnamed: 0,Dish Name,Baby Age,Iron-Rich,Allergen,Ingredients,Cooking Time (mins),Recipe,Texture,Meal Type,Calories (approx),Preparation Difficulty
0,Sweet Potato Purée,6-8 months,No,Dairy,"sweet potato, water or breastmilk/formula",15,"Wash and prep ingredients: sweet potato, water...",Purée,Breakfast,160,Easy
1,Carrot Purée,6-8 months,No,Dairy,"carrot, water or breastmilk/formula",15,"Wash and prep ingredients: carrot, water or br...",Purée,Breakfast,130,Easy
2,Pumpkin Purée,6-8 months,No,Dairy,"pumpkin, water or breastmilk/formula",15,"Wash and prep ingredients: pumpkin, water or b...",Purée,Breakfast,130,Easy
3,Pea Purée,6-8 months,No,,"peas, water",15,"Wash and prep ingredients: peas, water. Steam ...",Purée,Breakfast,90,Easy
4,Zucchini Purée,6-8 months,No,,"zucchini, water",15,"Wash and prep ingredients: zucchini, water. St...",Purée,Breakfast,90,Easy


In [4]:
df.columns = df.columns.str.lower().str.replace(' ', '_')
df['allergen'] = df['allergen'].fillna('no')
df.rename(columns={'iron-rich': 'iron_rich',
                   'cooking_time_(mins)': 'cooking_time', 
                   'calories_(approx)': 'calories'}, inplace=True)

In [25]:
#add an id in the beginning
df.insert(0, 'id', range(1, 1 + len(df)))

In [26]:
df.to_csv('../data/baby_recipes_cleaned.csv', sep=';', index=False)

In [5]:
documents = df.to_dict(orient='records')
documents[0]

{'dish_name': 'Sweet Potato Purée',
 'baby_age': '6-8 months',
 'iron_rich': 'No',
 'allergen': 'Dairy',
 'ingredients': 'sweet potato, water or breastmilk/formula',
 'cooking_time': 15,
 'recipe': 'Wash and prep ingredients: sweet potato, water or breastmilk/formula. Steam or gently simmer vegetables/fruit until fork-tender. Blend with a splash of cooking water or breastmilk/formula until smooth. Cool before serving. Store in the fridge up to 2 days or freeze portions up to 1 month.',
 'texture': 'Purée',
 'meal_type': 'Breakfast',
 'calories': 160,
 'preparation_difficulty': 'Easy'}

# Vector search

In [6]:
from dotenv import load_dotenv
import os
from groq import Groq

In [7]:
load_dotenv()

groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [8]:
from qdrant_client import QdrantClient, models

In [9]:
qd_client = QdrantClient("http://localhost:6333")

In [10]:
model = 'BAAI/bge-small-en'
EMBEDDING_DIMENSIONALITY = 384
collection_name = "recipe_database"

In [11]:
qd_client.delete_collection(collection_name=collection_name)

False

In [12]:
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [None]:
qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="id",
    field_schema="keyword"
)

In [None]:
points = []

for i, doc in enumerate(documents):
    text = ' '.join(str(doc.get(field, "")) for field in df.columns if field != "id")

    vector = models.Document(text=text, model=model)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [14]:
qd_client.upsert(
    collection_name=collection_name,
    points=points
)

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [15]:
def vector_search(question):
    print('vector_search is used')
    
    query_points = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model
        ),
        limit=2,
        with_payload=True
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [17]:
prompt_template = """
You are a helpful AI food assistant. Answer the QUESTION using only the information in CONTEXT. 
If the answer is not in CONTEXT, say you don't know.

Provide concise, factual answers with a recipe per dish. If multiple dishes match, summarize clearly.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
'dish_name': '{dish_name}',
'baby_age' : {baby_age},
'iron_rich': {iron_rich},
'allergen': {allergen},
'ingredients': '{ingredients}',
'cooking_time': {cooking_time},
'texture': '{texture}',
'meal_type': '{meal_type}',
'calories': {calories},
'preparation_difficulty': '{preparation_difficulty}',
'recipe': '{recipe}'
""".strip()


def build_prompt(query, search_results):

    context = ""

    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [18]:
import re

In [19]:
def llm(prompt):
    response = groq_client.chat.completions.create(
        model='qwen/qwen3-32b',
        messages=[{"role": "user", "content": prompt}]
    )
    final_answer = response.choices[0].message.content

    return re.sub(r"<think>.*?</think>\s*", "", final_answer, flags=re.DOTALL).strip()

In [20]:
def rag(query):
    search_results = vector_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

# Test run

In [21]:
question = "What are good iron-rich meals for a 11-month-old?"

In [23]:
print(rag(question))

vector_search is used
For an 11-month-old, two iron-rich options are:  

1. **Fish & Veggie Mash** (suitable for 9–12 months):  
   - *Ingredients*: Cod, potato, peas.  
   - *Recipe*: Steam fish until flaky, remove bones; steam veggies until tender. Mash to a soft, lumpy texture.  
   - *Texture*: Mash.  
   - *Allergen*: Fish.  

2. **Beef & Veggie Bolognese** (recommended for 12–18 months but can be adapted):  
   - *Ingredients*: Small pasta, beef, tomato, carrot, onion.  
   - *Recipe*: Cook pasta until very soft. Dice beef and cook through; steam veggies until tender. Serve as soft pieces.  
   - *Texture*: Soft pieces.  
   - *Allergen*: Gluten (from pasta).  

Always check for allergens and adjust texture as needed for your baby’s developmental stage.
