# Get necessary imports

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import json
import faiss
from sentence_transformers import SentenceTransformer

In [25]:
# Load JSONL for embeddings index mapping
data = []
print(f"Loading data from 'recipes_for_embeddings.jsonl'...")
assert os.path.exists('recipes_for_embeddings.jsonl'), "Data file not found!"
with open('recipes_for_embeddings.jsonl', 'r') as f:
    for line in f:
        data.append(json.loads(line))
metadata = [d for d in data]  # Keep full object to pass to Qwen later
print(f"Loaded {len(data)} documents.")

# Load full recipe details (includes directions)
print(f"Loading full recipe details from 'full_format_recipes.json'...")
with open('full_format_recipes.json', 'r') as f:
    full_recipes = json.load(f)

# Create lookup dict: title -> full recipe (skip recipes without titles)
recipe_lookup = {}
skipped = 0
for recipe in full_recipes:
    if 'title' in recipe and recipe['title']:
        recipe_lookup[recipe['title'].strip()] = recipe
    else:
        skipped += 1

print(f"Loaded {len(recipe_lookup)} full recipes with directions.")
if skipped > 0:
    print(f"Skipped {skipped} recipes without titles.")

Loading data from 'recipes_for_embeddings.jsonl'...
Loaded 18222 documents.
Loading full recipe details from 'full_format_recipes.json'...
Loaded 17775 full recipes with directions.
Skipped 19 recipes without titles.


In [26]:
# Load embedding model (must match the one used in build_index.ipynb)
print("Loading embedding model...")
embed_model = SentenceTransformer('BAAI/bge-m3')

# Load FAISS index
print("Loading FAISS index...")
index = faiss.read_index('recipe_index.faiss')
print(f"Index loaded with {index.ntotal} vectors.")

Loading embedding model...
Loading FAISS index...
Index loaded with 18222 vectors.


In [27]:
# --- RETRIEVAL FUNCTION ---
def search(query, k=3):
    # 1. Embed the query
    query_vec = embed_model.encode([query], convert_to_tensor=False)
    
    # 2. Search FAISS
    _, indices = index.search(query_vec, k)
    
    # 3. Retrieve actual documents
    results = []
    for idx in indices[0]:
        results.append(metadata[idx])
    return results

# Instantiating model and tokenizer

In [28]:
model_id = "Qwen/Qwen2.5-14B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)


In [33]:
def generate_rag_response(user_query):
    # 1. Retrieve Context
    retrieved_docs = search(user_query, k=3)
    
    # 2. Format Context with FULL recipe details (including directions)
    context_str = ""
    full_retrieved_recipes = []
    
    for i, doc in enumerate(retrieved_docs):
        title = doc['title'].strip()
        
        # Look up full recipe details
        full_recipe = recipe_lookup.get(title)
        
        if full_recipe:
            full_retrieved_recipes.append(full_recipe)
            
            # Format with ingredients and directions
            ingredients_str = '\n  - '.join(full_recipe.get('ingredients', []))
            directions_str = '\n  '.join(full_recipe.get('directions', []))
            
            context_str += f"Recipe {i+1}: {full_recipe['title']}\n"
            context_str += f"Ingredients:\n  - {ingredients_str}\n"
            context_str += f"Directions:\n  {directions_str}\n\n"
        else:
            # Fallback to embedding text if full recipe not found
            context_str += f"Recipe {i+1}: {doc['title']}\nContent: {doc['text_for_embedding']}\n\n"

    # 3. Construct Qwen Chat Template
    messages = [
        {"role": "system", "content":
         "You are a helpful, health-conscious AI cooking assistant. "
         "IMPORTANT: Only use information from the provided recipe context. "
         "Always cite which recipe(s) you're referencing by name. "
         "If the answer is not in the context, explicitly say 'I don't have this information in the available recipes.' "
         "Do not make up recipes or ingredients that aren't in the context."},
        {"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {user_query}"}
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # 4. Display retrieved recipes with full details
    print("=== RETRIEVED RECIPES ===\n")
    for i, recipe in enumerate(full_retrieved_recipes):
        print(f"{i+1}. {recipe['title']}")
        print(f"\nIngredients:")
        for ingredient in recipe.get('ingredients', []):
            print(f"  - {ingredient}")
        
        print(f"\nDirections:")
        for j, direction in enumerate(recipe.get('directions', []), 1):
            print(f"  {j}. {direction}")
        print("\n" + "="*50 + "\n")
    
    return full_retrieved_recipes  # Return full recipes for validation

In [None]:
query = "I have eggplants and garlic, what can I make?"
generate_rag_response(query); # semicolon to suppress output

=== RETRIEVED RECIPES ===

1. Roasted Eggplant and Garlic Dip 

Ingredients:
  - 2 small heads garlic
  - 1 eggplant (1 pound)
  - 1/4 cup extra-virgin olive oil
  - 1 teaspoon red-wine vinegar, or to taste

Directions:
  1. Preheat oven to 425°F.
  2. Separate garlic cloves without peeling and tightly wrap together in foil. Prick eggplant with a fork. In a shallow baking pan roast garlic and eggplant in middle of oven until very tender, about 30 minutes for garlic and about 45 minutes for eggplant.
  3. Unwrap garlic and peel, transferring garlic to a food processor. Scrape flesh from eggplant into food processor, discarding skin. Purée mixture until smooth and, with motor running, add oil and vinegar until combined. Season dip with salt and pepper and serve with pita toasts.


2. Herbed Eggplant with Tomatoes, Onion and Garlic 

Ingredients:
  - 3 medium eggplants
  - 1/3 cup canned diced tomatoes in juice, drained
  - 1/2 cup chopped white onion
  - 2 garlic cloves, minced
  - 3 tab