In [10]:
from smolagents import CodeAgent, InferenceClientModel, Tool, tool
from dotenv import load_dotenv
import os
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from langfuse import get_client
from huggingface_hub import notebook_login
notebook_login()
load_dotenv()
angfuse_key = os.getenv('ANGFUSE_SECRET_KEY')

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
class RecipeInfoRetrieverTool(Tool):
    name = "RecipeInfoRetriever"
    description = "Retrieves information about recipes including ingredients, title and instructions based on a natural language query."
    inputs = {
        "query": {
            "type": "string",
            "description": "Natural language query about ingredients or dietary preferences (e.g., 'eggplant and garlic', 'vegan pasta')."
        }
    }
    output_type = "string"
    
    def __init__(self, k: int = 3, **kwargs):
        """Initialize the recipe retrieval tool.
        
        Args:
            k: Number of top recipes to retrieve (default: 3)
        """
        super().__init__(**kwargs)
        self.k = k
        
        print("Loading recipe retrieval system...")
        
        # Load metadata from embeddings file
        self.metadata = []
        with open('recipes_for_embeddings.jsonl', 'r') as f:
            for line in f:
                self.metadata.append(json.loads(line))
        
        # Load full recipe details (with ingredients and directions)
        with open('full_format_recipes.json', 'r') as f:
            full_recipes = json.load(f)
        
        # Create title lookup for full recipe details
        self.recipe_lookup = {}
        for recipe in full_recipes:
            if 'title' in recipe and recipe['title']:
                self.recipe_lookup[recipe['title'].strip()] = recipe
        
        # Load embedding model
        print("Loading embedding model BAAI/bge-m3...")
        self.embed_model = SentenceTransformer('BAAI/bge-m3')
        
        # Load FAISS index
        print("Loading FAISS index...")
        self.index = faiss.read_index('recipe_index.faiss')
        
        print(f"✓ Recipe retrieval system loaded: {len(self.metadata)} recipes indexed")
    
    def forward(self, query: str) -> str:
        """Search for recipes matching the query.
        
        Args:
            query: Natural language search query
            
        Returns:
            Formatted string with recipe titles, ingredients, and directions
        """
        assert isinstance(query, str), "Query must be a string."
        
        # 1. Embed the query
        query_vec = self.embed_model.encode([query], convert_to_tensor=False)
        
        # 2. Search FAISS index
        _, indices = self.index.search(query_vec, self.k)
        
        # 3. Retrieve documents
        retrieved_docs = [self.metadata[idx] for idx in indices[0]]
        
        # 4. Format output with full recipe details
        output = f"Found {len(retrieved_docs)} recipes matching '{query}':\n\n"
        
        for i, doc in enumerate(retrieved_docs, 1):
            title = doc['title'].strip()
            
            # Look up full recipe details
            full_recipe = self.recipe_lookup.get(title)
            
            if full_recipe:
                output += f"{'='*60}\n"
                output += f"Recipe {i}: {full_recipe['title']}\n"
                output += f"{'='*60}\n\n"
                
                # Ingredients
                output += "INGREDIENTS:\n"
                for ingredient in full_recipe.get('ingredients', []):
                    output += f"  • {ingredient}\n"
                
                # Directions
                output += "\nDIRECTIONS:\n"
                for j, direction in enumerate(full_recipe.get('directions', []), 1):
                    output += f"  {j}. {direction}\n"
                
                output += "\n"
            else:
                # Fallback if full recipe not found
                output += f"{'='*60}\n"
                output += f"Recipe {i}: {title}\n"
                output += f"{'='*60}\n"
                output += f"{doc.get('text_for_embedding', 'No details available')}\n\n"
        
        return output

In [11]:
# Initialize model and tool
model = InferenceClientModel("Qwen/Qwen2.5-14B-Instruct")
retrieval_tool = RecipeInfoRetrieverTool()

Loading recipe retrieval system...
Loading embedding model BAAI/bge-m3...
Loading FAISS index...
✓ Recipe retrieval system loaded: 18222 recipes indexed


In [13]:
result = retrieval_tool.forward("eggplant and garlic")
print(result)

Found 3 recipes matching 'eggplant and garlic':

Recipe 1: Roasted Eggplant and Garlic Dip 

INGREDIENTS:
  • 2 small heads garlic
  • 1 eggplant (1 pound)
  • 1/4 cup extra-virgin olive oil
  • 1 teaspoon red-wine vinegar, or to taste

DIRECTIONS:
  1. Preheat oven to 425°F.
  2. Separate garlic cloves without peeling and tightly wrap together in foil. Prick eggplant with a fork. In a shallow baking pan roast garlic and eggplant in middle of oven until very tender, about 30 minutes for garlic and about 45 minutes for eggplant.
  3. Unwrap garlic and peel, transferring garlic to a food processor. Scrape flesh from eggplant into food processor, discarding skin. Purée mixture until smooth and, with motor running, add oil and vinegar until combined. Season dip with salt and pepper and serve with pita toasts.

Recipe 2: Herbed Eggplant with Tomatoes, Onion and Garlic 

INGREDIENTS:
  • 3 medium eggplants
  • 1/3 cup canned diced tomatoes in juice, drained
  • 1/2 cup chopped white onion
  