In [5]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer
import pandas as pd
import os
from smolagents import OpenAIServerModel
from smolagents import CodeAgent, WebSearchTool, LiteLLMRouterModel
import requests
from pydantic import BaseModel, ValidationError
from typing import List
import json
import re


In [6]:
# Test df
df = pd.read_csv("../Data/100recipes.csv")

## Load Some Embedding model:

In [7]:
model_emb = SentenceTransformer("avsolatorio/GIST-large-Embedding-v0")

In [8]:
# Combine relevant text fields into one string per recipe
def make_full_text(row):
    ingredients = " ".join(eval(row["ingredients"])) if isinstance(row["ingredients"], str) else ""
    directions = " ".join(eval(row["directions"])) if isinstance(row["directions"], str) else ""
    return f"{row['title']} {ingredients} {directions}"

df["full_text"] = df.apply(make_full_text, axis=1)

## Embed the first 100 test recipies

In [9]:
texts = model_emb.encode(df.full_text, show_progress_bar= True)

Batches: 100%|██████████| 4/4 [00:12<00:00,  3.17s/it]


## Query Expansion

In [10]:
question = input("What would you like to cook? ")
ingredients = input("What Ingredients do you have at home?: ")

In [17]:
url = "http://localhost:1234/v1/chat/completions"
headers = {"Content-Type": "application/json"}

data = {
    "model": "qwen3-0.6b",
    "messages": [
        {"role": "system", "content": """""You are an intelligent recipe query enrichment assistant. Your task is not to answer the user's question, but to think out loud and then output a list of highly relevant keywords related to food, cooking, ingredients, cuisines, or dish types.

Begin your answer with a <think> block where you reason about what the user might want, and how to expand their query in a food-related context.

End your answer with a comma-separated list of keywords. Do not include full sentences, explanations, or unrelated topics.

For example:

User: I want to eat something Italian.
<think>
They’re probably looking for Italian food — maybe pasta, pizza, or other dishes typical of that cuisine. I’ll expand with some core ingredients and dish types.
</think>
Italian, pasta, pizza, mozzarella, tomato, olive oil, herbs, risotto

User: {question}"

  """},
        {"role": "user", "content": f"{question}"}
    ],
    "temperature": 0.1,
    "max_tokens": 256,
    "stream": False
}

response = requests.post(url, headers=headers, json=data)
print(response.json()["choices"][0]["message"]["content"])


<think>
Okay, the user is asking for something German. Let me think about what they might want. Germans have a rich culinary tradition, so maybe they're looking for German food. Common dishes include hearty meals like Brotwein or Spaghetti alle vino. I should mention specific ingredients and cuisines related to Germany. Also, considering the user's possible interest in traditional recipes, including terms like Brotwein or Spaghetti would be good. Need to make sure the keywords are relevant and expand on the query naturally.
</think>

German food - bread wine, Spaghetti alle vino, German beer, hearty meals, spiced dishes, sauerkraut, strong meat, traditional recipes, regional specialties, German cuisine, spice blends, hearty stews, hearty dishes, German dishes, traditional German foods.


In [18]:
raw_query = response.json()["choices"][0]["message"]["content"]
_, q_ext = raw_query.split('</think>\n\n')

In [19]:
question_vec = model_emb.encode(question + q_ext + ingredients, show_progress_bar=True)

Batches: 100%|██████████| 1/1 [00:00<00:00,  6.51it/s]


In [14]:
import torch

In [20]:
similarities = model_emb.similarity(texts, question_vec)
similarities[0:10]

tensor([[0.3148],
        [0.3995],
        [0.3616],
        [0.4507],
        [0.2724],
        [0.4871],
        [0.3638],
        [0.3139],
        [0.3582],
        [0.3666]])

## Get the 3 best Recepies

In [26]:
top_k = torch.topk(similarities.squeeze(), k=3)
top_indices = top_k.indices
recipes = df.iloc[top_indices]

In [27]:
recipes = recipes[["title", "ingredients", "directions"]].reset_index(drop=True)

Unnamed: 0,title,ingredients,directions
0,Chicken Stew,"[""3 lb. chicken, boiled"", ""4 medium potatoes, ...","[""Remove chicken from bone."", ""Use the broth.""..."
1,Spanish Hamburgers,"[""1/2 c. celery"", ""2 chopped onions"", ""2 Tbsp....","[""Brown celery, onions and ground beef in butt..."
2,Summer Chicken,"[""1 pkg. chicken cutlets"", ""1/2 c. oil"", ""1/3 ...","[""Double recipe for more chicken.""]"


In [None]:
recipes.directions[0]
recipes_for_llm = recipes.to_dict(orient="records")

In [None]:
# Define Pydantic model for structured output
class Recipe(BaseModel):
    title: str
    ingredients: List[str]
    directions: List[str]

# Set up API call
url = "http://localhost:1234/v1/chat/completions"
headers = {"Content-Type": "application/json"}

data = {
    "model": "qwen3-0.6b",
    "messages": [
        {
            "role": "system",
            "content": """You are a helpful recipe assistant. Your task is to provide a concise and relevant response based on the user's question and the ingredients they have at home.
You should return a new recipe based on the user's question and the ingredients they have, using the top recipes from a dataset.
Do not include any explanations or additional information, just the recipe details in valid JSON format.

Start with <think> for reasoning. After </think>, return ONLY a JSON object in this format:
{
  "title": "...",
  "ingredients": ["..."],
  "directions": ["..."]
}
"""
        },
        {
            "role": "user",
            "content": f"question: {question}, ingredients: {ingredients}, top recipes: {recipes_for_llm}"
        }
    ],
    "temperature": 0.1,
    "max_tokens": 2048,
    "stream": False
}

# Call model
response = requests.post(url, headers=headers, json=data)
content = response.json()["choices"][0]["message"]["content"]
print("🔍 Raw model output:\n", content)

# Extract JSON after </think>
match = re.search(r"</think>\s*(\{.*\})", content, re.DOTALL)
if match:
    raw_json = match.group(1)
    try:
        parsed = json.loads(raw_json)
        recipe = Recipe(**parsed)
        print("\n✅ Structured recipe:")
        print(recipe)
    except (json.JSONDecodeError, ValidationError) as e:
        print("❌ Error parsing or validating the recipe:\n", e)
else:
    print("❌ Could not find JSON block after </think>.")


🔍 Raw model output:
 <think>
Okay, let's see. The user wants a German recipe using potatoes and chicken, and they provided some top recipes from a dataset. Let me check the ingredients and directions.

The first recipe is Chicken Stew with 3 lb chicken, boiled, and 4 medium potatoes. Then there's Spanish Hamburgers with celery, onions, oil, and another ingredient. The third one is Summer Chicken with cutlets, oil, and some other stuff. 

Since the user mentioned "something german" and has ingredients of potatoes and chicken, the most obvious fit would be the Chicken Stew recipe. Let me make sure there are no typos in the ingredients list. The ingredients for the Chicken Stew are 3 lb chicken, boiled, and 4 medium potatoes. That's all they have at home. Directions for the Chicken Stew include removing the chicken from bone and using the broth. 

I should structure this as a JSON object with title, ingredients, and directions. No explanations or additional info. Just the recipe details i

In [None]:
class ReviewResult(BaseModel):
    approved: bool
    missing_ingredients: List[str]

data = {
    "model": "qwen3-0.6b",
    "messages": [
        {
            "role": "system",
            "content": """You are a helpful recipe reviewer assistant.

Your task is to review the newly generated recipe against the user's original question and the ingredients they have at home.

Based on the recipe ingredients, check if all ingredients are available in the user's list.

Return a JSON object ONLY with the following fields:

{
  "approved": true or false,
  "missing_ingredients": [list of missing ingredient names, empty if none]
}

- "approved" is true if the recipe matched the user's question.
- "missing_ingredients" lists any ingredients required by the recipe that the user does not have.
- Do NOT include any explanations or extra text, only the JSON.

Example input:
User question: I want to cook something Italian.
User ingredients: ["pasta", "garlic", "olive oil"]
Recipe: ["title": "Pasta with Garlic and Olive Oil", "ingredients": ["pasta", "garlic", "olive oil", "parsley"], "directions": ["Cook pasta", "Sauté garlic", "Mix with olive oil and parsley"]}

Expected output:
{
  "approved": false,
  "missing_ingredients": ["tomato sauce", "basil"]
}
"""
        },
        {
            "role": "user",
            "content": f"question: {question}, ingredients: {ingredients}, recipe: {recipe}"
        }
    ],
    "temperature": 0.1,
    "max_tokens": 2048,
    "stream": False
}

# Call model
response = requests.post(url, headers=headers, json=data)
content = response.json()["choices"][0]["message"]["content"]

print("🔍 Raw model output:\n", content)

# Extract JSON after optional <think> block if present
if "<think>" in content:
    _, json_part = content.split("</think>", 1)
else:
    json_part = content

json_part = json_part.strip()

try:
    review = ReviewResult.parse_raw(json_part)
    print("✅ Parsed review result:")
except ValidationError as e:
    print("❌ Failed to parse review JSON:", e)
    print("Raw JSON content was:", json_part)


🔍 Raw model output:
 <think>
Okay, let's see. The user wants to check if their ingredients are available for making a Chicken Stew recipe. They provided both the question and their ingredients.

The question is "something german", which probably refers to German cuisine. But maybe that's just a way of saying it's a stew. Anyway, the main thing is to review the ingredients against what they have at home.

The user's ingredients list includes "potatoes" and "chicken". The recipe mentions "3 lb. chicken, boiled" and "4 medium potatoes". So those are exactly matching what the user has. There's no mention of other ingredients like olive oil or garlic in the recipe, but since the user didn't have those, they're not part of the missing list.

Wait, but the question is about a German dish, so maybe there's some confusion here. But according to the given data, the ingredients are potatoes and chicken, which match what the user has. So the approved should be true with no missing ingredients list

C:\Users\TimPr\AppData\Local\Temp\ipykernel_48788\3735818500.py:68: PydanticDeprecatedSince20: The `parse_raw` method is deprecated; if your data is JSON use `model_validate_json`, otherwise load the data then use `model_validate` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  review = ReviewResult.parse_raw(json_part)


for later agentic usage:

In [14]:
model = OpenAIServerModel(
    model_id="qwen3-0.6b",
    api_base="http://localhost:1234/v1",
    api_key= "not-needed",
)

In [15]:
# agent = CodeAgent(tools=[WebSearchTool()], model=model)
# agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")

In [None]:
from smolagents import (
    CodeAgent,
    ToolCallingAgent,
    InferenceClientModel,
    WebSearchTool,
    LiteLLMModel,
)

model = OpenAIServerModel(
    model_id="qwen3-0.6b",
    api_base="http://localhost:1234/v1",
    api_key= "not-needed",
)

web_agent = ToolCallingAgent(
    tools=[WebSearchTool(), visit_webpage],
    model=model,
    max_steps=10,
    name="web_search_agent",
    description="Runs web searches for you.",
)

In [18]:
manager_agent = CodeAgent(
    tools=[],
    model=model,
    managed_agents=[web_agent],
    additional_authorized_imports=["time", "numpy", "pandas"],
)

In [19]:
answer = manager_agent.run("If LLM training continues to scale up at the current rhythm until 2030, what would be the electric power in GW required to power the biggest training runs by 2030? What would that correspond to, compared to some countries? Please provide a source for any numbers used.")