In [3]:
import pandas as pd

In [4]:
df = pd.read_csv('../data/baby_recipes.csv', sep = ';')
df.head()

Unnamed: 0,Dish Name,Baby Age,Iron-Rich,Allergen,Ingredients,Cooking Time (mins),Recipe,Texture,Meal Type,Calories (approx),Preparation Difficulty
0,Cheese & Broccoli Pasta 1,18-36 months,No,"Gluten,Dairy","Small pasta, broccoli, cheese, butter",15,"Cook pasta, steam broccoli, mix with cheese sauce",Soft Pieces,Dinner,200,Medium
1,Chicken & Sweet Pea Purée 2,8-12 months,Yes,,"Chicken breast, peas, sweet potato, water",30,"Steam chicken & veggies, blend to purée",Purée,Dinner,120,Medium
2,Cheese & Broccoli Pasta 3,18-36 months,No,"Gluten,Dairy","Small pasta, broccoli, cheese, butter",15,"Cook pasta, steam broccoli, mix with cheese sauce",Soft Pieces,Dinner,200,Easy
3,Chicken & Sweet Pea Purée 4,8-12 months,Yes,,"Chicken breast, peas, sweet potato, water",30,"Steam chicken & veggies, blend to purée",Purée,Dinner,120,Medium
4,Mini Veggie Omelette Fingers 5,9-12 months,Yes,Egg,"Eggs, spinach, cheese (optional), olive oil",15,"Whisk eggs, cook with veggies, cut into finger...",Finger Food,Breakfast,150,Medium


In [5]:
df.columns = df.columns.str.lower().str.replace(' ', '_')
df['allergen'] = df['allergen'].fillna('no')

# Vector search

In [28]:
from dotenv import load_dotenv
import os
from groq import Groq

In [29]:
load_dotenv()

groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [30]:
from qdrant_client import QdrantClient, models

In [31]:
qd_client = QdrantClient("http://localhost:6333")

In [None]:
model = 'BAAI/bge-small-en'
EMBEDDING_DIMENSIONALITY = 384
collection_name = "recipe_database"

In [6]:
qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [12]:
documents = df.to_dict(orient='records')
documents[0]

{'dish_name': 'Cheese & Broccoli Pasta 1',
 'baby_age': '18-36 months',
 'iron-rich': 'No',
 'allergen': 'Gluten,Dairy',
 'ingredients': 'Small pasta, broccoli, cheese, butter',
 'cooking_time_(mins)': 15,
 'recipe': 'Cook pasta, steam broccoli, mix with cheese sauce',
 'texture': 'Soft Pieces',
 'meal_type': 'Dinner',
 'calories_(approx)': 200,
 'preparation_difficulty': 'Medium'}

In [22]:
points = []

for i, doc in enumerate(documents):
    text = ' '.join(str(doc.get(field, "")) for field in df.columns)

    vector = models.Document(text=text, model=model)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc
    )
    points.append(point)

In [23]:
qd_client.upsert(
    collection_name=collection_name,
    points=points
)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

model_optimized.onnx:   0%|          | 0.00/133M [00:00<?, ?B/s]

[32m2025-09-03 21:07:04.716[0m | [31m[1mERROR   [0m | [36mfastembed.common.model_management[0m:[36mdownload_model[0m:[36m430[0m - [31m[1mCould not download model from HuggingFace: [WinError 1314] Dem Client fehlt ein erforderliches Recht: '..\\..\\blobs\\d24f8d0cfab6e6a99fca53c1143c7d780aeb210c' -> 'C:\\Users\\kabal\\AppData\\Local\\Temp\\fastembed_cache\\models--Qdrant--bge-small-en\\snapshots\\8791246cc2a79c7949a4dc0d4a018cbd7d024879\\config.json' Falling back to other sources.[0m
100%|██████████| 77.7M/77.7M [00:04<00:00, 15.7MiB/s]


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [24]:
def vector_search(question):
    print('vector_search is used')
    
    query_points = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model
        ),
        limit=3,
        with_payload=False
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

In [38]:
def build_prompt(query, search_results, max_docs=5):

    prompt_template = """
    You are a helpful AI food assistant. Answer the QUESTION using only the information in CONTEXT. 
    If the answer is not in CONTEXT, say you don't know.

    Provide concise, factual answers. If multiple dishes match, summarize clearly.

    QUESTION: {question}

    CONTEXT:
    {context}

    Answer:
    """.strip()

    # Limit to top N documents
    context_entries = []
    for i, doc in enumerate(search_results[:max_docs]):
        context_entries.append(
            f"- Dish: {doc.get('dish_name', 'Unknown')}\n"
            f"  Baby Age: {doc.get('baby_age', 'N/A')}\n"
            f"  Iron Rich: {doc.get('iron-rich', 'N/A')}\n"
            f"  Allergens: {doc.get('allergen', 'N/A')}\n"
            f"  Ingredients: {doc.get('ingredients', 'N/A')}\n"
            f"  Cooking Time: {doc.get('cooking_time_(mins)', 'N/A')} mins\n"
            f"  Texture: {doc.get('texture', 'N/A')}\n"
            f"  Meal Type: {doc.get('meal_type', 'N/A')}\n"
            f"  Calories: {doc.get('calories_(approx)', 'N/A')}\n"
            f"  Difficulty: {doc.get('preparation_difficulty', 'N/A')}\n"
            f"  Recipe: {doc.get('recipe', 'N/A')}"
        )

    context = "\n\n".join(context_entries)
    prompt = prompt_template.format(question=query, context=context)
    return prompt


In [34]:
def llm(prompt):
    response = groq_client.chat.completions.create(
        model='qwen/qwen3-32b',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [35]:
def rag(query):
    search_results = vector_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [39]:
question = "What are good iron-rich meals for a 6-month-old?"

In [40]:
rag(question)

vector_search is used


AttributeError: 'NoneType' object has no attribute 'get'