In [1]:
import json
import numpy as np
import re
import math
import ollama
from sentence_transformers import SentenceTransformer
from langchain.schema import Document
from rank_bm25 import BM25Okapi

# Load the dataset
with open('final_recipes.json', 'r') as f:
    recipes = json.load(f)

# Prepare text data for embedding and BM25
documents = []
tokenized_corpus = []
for recipe in recipes:
    if all(recipe.get(field) not in [None, "", "NaN"] and not (isinstance(recipe.get(field), float) and math.isnan(recipe.get(field))) for field in ['title', 'recipe_url', 'category']):
        total_time = int(re.search(r'\d+', recipe['total_time']).group()) if isinstance(recipe['total_time'], str) and re.search(r'\d+', recipe['total_time']) else 0
        servings = int(float(recipe['servings'])) if str(recipe['servings']).replace('.', '', 1).isdigit() else 0
        calories = float(recipe['calories']) if str(recipe['calories']).replace('.', '', 1).isdigit() else 0.0
        protein = float(recipe['protein']) if str(recipe['protein']).replace('.', '', 1).isdigit() else 0.0
        fat = float(recipe['total_fat']) if str(recipe['total_fat']).replace('.', '', 1).isdigit() else 0.0
        ingredients = recipe['ingredients'] if recipe['ingredients'] not in [None, "", "NaN"] else "Not Available"
        directions = recipe['directions'] if recipe['directions'] not in [None, "", "NaN"] else "Not Available"
        
        text = f"Title: {recipe['title']}\nCategory: {recipe['category']}\nTotal Time: {total_time} mins\nServings: {servings}\nIngredients: {ingredients}\nDirections: {directions}\nCalories: {calories}\nProtein: {protein}\nFat: {fat}"
        documents.append(Document(page_content=text, metadata={'url': recipe['recipe_url']}))
        tokenized_corpus.append(text.lower().split())

# Initialize Embedding Model & BM25 Index
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedding_model.encode([doc.page_content for doc in documents])
bm25 = BM25Okapi(tokenized_corpus)

# ==========================
# Retrieval Function
# ==========================
def retrieve_top_recipes(query, top_k=3):
    words = query.lower().split()
    query_tokens = query.lower().split()
    
    # BM25 Search
    bm25_scores = bm25.get_scores(query_tokens)
    bm25_ranked_indices = np.argsort(bm25_scores)[::-1][:top_k * 10]
    bm25_candidates = [documents[i] for i in bm25_ranked_indices]
    
    # Dense Search
    query_embedding = embedding_model.encode([query])
    dense_scores = np.dot(embeddings, query_embedding.T).flatten()
    dense_ranked_indices = np.argsort(dense_scores)[::-1][:top_k * 10]
    dense_candidates = [documents[i] for i in dense_ranked_indices]
    
    # Merge and remove duplicates
    merged_candidates = {}
    for doc in bm25_candidates + dense_candidates:
        merged_candidates[doc.page_content] = doc
    combined_candidates = list(merged_candidates.values())
    
    return combined_candidates[:top_k]

In [5]:
def generate_recipe_with_retrieval(cooking_time, retrieved_recipes_text):
    prompt = f"""You are an expert cook. Using the following examples as guidance, generate a new unique recipe that meets the given cooking time constraint.

Cooking Time: The total cooking time must be = {cooking_time} minutes.
Cooking includes boiling, baking, heating, frying, sautéing, grilling, roasting, or steaming.
It does not include preparation, refrigeration, or cooling time.

Follow exactly this format:
Recipe Title: [Clear & concise name]
Ingredients: [List ingredients with exact quantities]
Instructions: [Step-by-step directions]

The new recipe must be different from examples but inspired by their style and ingredient types.

---
Examples:
{retrieved_recipes_text}
---
Generate the new recipe now:
"""

    response = ollama.chat(
        model='llama2:7b',
        messages=[{'role': 'user', 'content': prompt}]
    )
    return response['message']['content']

In [12]:
# ==========================
# Evaluation Function
# ==========================
def evaluate_recipe_with_llama3(recipe_text):
    evaluation_prompt = f"""
You are a strict recipe evaluation expert. Evaluate the following generated recipe against the following five metrics:

1. Faithfulness (Is the information factually correct?)
2. Answer Relevance (Does it stick to the given cooking time and format?)
3. Coherence (Is it easy to follow and logically ordered?)
4. Hallucination (Are there any made-up ingredients, units, or steps?)
5. Consistency of quality and units (Do quantities match ingredient names properly?)

Here is the recipe:
---
{recipe_text}
---

Provide a score for each metric out of 5, and a 2-3 line feedback for each metric.
Format:

Faithfulness: X/5 - [feedback]
Answer Relevance: X/5 - [feedback]
Coherence: X/5 - [feedback]
Hallucination: X/5 - [feedback]
Consistency: X/5 - [feedback]
"""
    response = ollama.chat(
        model='llama3:latest',
        messages=[{'role': 'user', 'content': evaluation_prompt}]
    )
    return response['message']['content']

In [23]:
if __name__ == "__main__":
    num_recipes = 5
    all_recipes_dict = {}

    for cooking_time in [2,5,10, 40,90, 150]:
        all_recipes_dict[cooking_time] = []

        while len(all_recipes_dict[cooking_time]) < num_recipes:
            # Retrieve examples
            query = f"Recipes that can be cooked in {cooking_time} minutes"
            top_retrieved = retrieve_top_recipes(query, top_k=3)
            retrieved_context = "\n\n".join([doc.page_content for doc in top_retrieved])

            # Generate using retrieval
            generated_recipe = generate_recipe_with_retrieval(cooking_time, retrieved_context)

            # Ensure uniqueness
            if generated_recipe not in all_recipes_dict[cooking_time]:
                all_recipes_dict[cooking_time].append(generated_recipe)

                # Evaluate
                evaluation = evaluate_recipe_with_llama3(generated_recipe)
                print(f"\n=== Cooking Time: {cooking_time} mins ===")
                # print("\nGenerated Recipe:\n", generated_recipe)
                print("\nEvaluation Report:\n", evaluation)

    # Save all generated recipes
    with open('generated_rag_recipes.json', 'w') as f:
        json.dump(all_recipes_dict, f, indent=2)



=== Cooking Time: 2 mins ===

Evaluation Report:
 Here are my evaluations for each metric:

Faithfulness: 4.5/5 - The recipe appears to be factually correct, with accurate information about cooking times and temperatures. However, I question the claim that this recipe takes only 2 minutes of total cooking time. It seems unlikely that the eggs would cook in just 10-12 minutes, especially considering the vegetables are cooked separately.

Answer Relevance: 3/5 - While the recipe is designed for a specific format (ovensafe skillet), it's unclear why it claims to take only 2 minutes of total cooking time. This discrepancy may lead readers to question the accuracy of other times and instructions in the recipe.

Coherence: 4/5 - The recipe is generally easy to follow, with clear instructions and logical steps. However, I would suggest rephrasing step 5 to emphasize that the eggs are cooked for 1 minute on the stovetop before being transferred to the oven.

Hallucination: 2/5 - There is no i