In [None]:
# -*- coding: utf-8 -*-
"""
🎬 DSPy Multi-Agent Movie Recommendation System - OPTIMIZATION DEMONSTRATION

This notebook demonstrates how to optimize a sophisticated multi-agent system using DSPy.
We'll take the existing movie recommendation system and systematically improve it.

📊 What You'll See:
1. Original multi-agent system baseline
2. Training data generation for optimization
3. Custom evaluation metrics for movie recommendations
4. DSPy optimization applied to individual agents
5. Side-by-side comparison of original vs optimized
6. Performance analytics and improvement measurement
"""

# =============================================================================
# 📦 SECTION 1: Environment Setup & Dependencies
# =============================================================================

# Install required packages
!pip install dspy-ai
!pip install mlflow
!pip install requests
!pip install gradio
!pip install openai
!pip install -U 'mlflow[databricks]>=3.1'


Collecting dspy-ai
  Downloading dspy_ai-2.6.27-py3-none-any.whl.metadata (286 bytes)
Collecting dspy>=2.6.5 (from dspy-ai)
  Downloading dspy-2.6.27-py3-none-any.whl.metadata (7.0 kB)
Collecting backoff>=2.2 (from dspy>=2.6.5->dspy-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting ujson>=5.8.0 (from dspy>=2.6.5->dspy-ai)
  Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting datasets>=2.14.6 (from dspy>=2.6.5->dspy-ai)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting optuna>=3.4.0 (from dspy>=2.6.5->dspy-ai)
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting magicattr>=0.1.6 (from dspy>=2.6.5->dspy-ai)
  Downloading magicattr-0.1.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting litellm>=1.60.3 (from dspy>=2.6.5->dspy-ai)
  Downloading litellm-1.73.6-py3-none-any.whl.metadata (39 kB)
Collecting diskcache>=5.6.0 (from dspy>=2.6.5->dspy-ai)
  Downloading

In [None]:

# Import required libraries
import dspy
import os
import json
import requests
import pandas as pd
import numpy as np
from typing import List, Dict, Optional, Any, Tuple
from dataclasses import dataclass
from datetime import datetime
import mlflow
import gradio as gr
import random
import re
from collections import defaultdict

print("✅ All packages installed successfully!")


✅ All packages installed successfully!


In [None]:

# =============================================================================
# 🔑 SECTION 2: API Configuration
# =============================================================================

from getpass import getpass

# Set up your API keys
OPENAI_API_KEY = getpass("Enter your OpenAI API key: ")
TMDB_API_KEY = getpass("Enter your TMDB API key (or use demo key): ")

# Configure environment
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

# Configure DSPy
llm = dspy.LM(model="openai/gpt-4o-mini", max_tokens=1000)
dspy.settings.configure(lm=llm)

# Set Databricks authentication details as environment variables
DATABRICKS_HOST = "https://<your_id>.cloud.databricks.com"
DATABRICKS_TOKEN = getpass("Enter your Databricks PAT: ")  # Prompt for new PAT

os.environ["DATABRICKS_HOST"] = DATABRICKS_HOST
os.environ["DATABRICKS_TOKEN"] = DATABRICKS_TOKEN

# Explicitly set tracking URI with Databricks format
mlflow.set_tracking_uri("databricks")

# Set experiment with correct user path (replace with your actual email)
mlflow.set_experiment("/Users/<email_id>/<experiment_name>")  # Adjust email

# Enable autologging
mlflow.autolog()

print("🔑 API configuration complete!")
# # Configure DSPy
# llm = dspy.LM(model="openai/gpt-4o-mini", max_tokens=1000)
# dspy.settings.configure(lm=llm)

# # Configure MLflow for tracking optimization
# mlflow.set_experiment("dspy-movie-optimization-demo")
# mlflow.autolog()

print("🔑 API configuration complete!")


Enter your OpenAI API key: ··········
Enter your TMDB API key (or use demo key): ··········
Enter your Databricks PAT: ··········


2025/06/30 00:56:09 INFO mlflow.tracking.fluent: Experiment with name '/Users/movcro5@gmail.com/dspy-movie-optimization-demo' does not exist. Creating a new experiment.
2025/06/30 00:56:09 INFO mlflow.tracking.fluent: Autologging successfully enabled for dspy.
2025/06/30 00:56:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for litellm.
2025/06/30 00:56:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for openai.
2025/06/30 00:56:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


🔑 API configuration complete!
🔑 API configuration complete!


In [None]:

# =============================================================================
# 🎬 SECTION 3: Enhanced TMDB Client (From Original System)
# =============================================================================

class EnhancedTMDBClient:
    """Enhanced TMDB API client with quality filtering"""

    def __init__(self, api_key: str):
        self.api_key = api_key
        self.base_url = "https://api.themoviedb.org/3"

    def search_movie(self, title: str) -> Dict:
        """Search for a movie by title"""
        url = f"{self.base_url}/search/movie"
        params = {
            "api_key": self.api_key,
            "query": title,
            "language": "en-US"
        }
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                results = response.json().get("results", [])
                return results[0] if results else {}
        except Exception as e:
            print(f"Error searching movie: {e}")
        return {}

    def get_movie_details(self, movie_id: int) -> Dict:
        """Get detailed movie information"""
        url = f"{self.base_url}/movie/{movie_id}"
        params = {
            "api_key": self.api_key,
            "append_to_response": "credits,keywords,similar,recommendations"
        }
        try:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                return response.json()
        except Exception as e:
            print(f"Error getting movie details: {e}")
        return {}

    def extract_movie_metadata(self, movie_details: Dict) -> Dict:
        """Extract structured metadata from TMDB movie details"""
        if not movie_details:
            return {}

        # Extract genres
        genres = [genre["name"].lower() for genre in movie_details.get("genres", [])]
        genre_ids = [genre["id"] for genre in movie_details.get("genres", [])]

        # Extract cast (top 5)
        cast = []
        credits = movie_details.get("credits", {})
        for actor in credits.get("cast", [])[:5]:
            cast.append(actor.get("name", ""))

        # Extract director
        crew = credits.get("crew", [])
        director = ""
        for person in crew:
            if person.get("job") == "Director":
                director = person.get("name", "")
                break

        # Extract themes from keywords
        keywords_data = movie_details.get("keywords", {})
        themes = [kw["name"].lower() for kw in keywords_data.get("keywords", [])[:10]]

        return {
            "title": movie_details.get("title", ""),
            "release_date": movie_details.get("release_date", ""),
            "overview": movie_details.get("overview", ""),
            "genres": genres,
            "genre_ids": genre_ids,
            "themes": themes,
            "director": director,
            "cast": cast,
            "runtime": movie_details.get("runtime", 0),
            "vote_average": movie_details.get("vote_average", 0),
            "tmdb_id": movie_details.get("id", 0)
        }

    def get_comprehensive_movie_data(self, title: str) -> Dict:
        """Get comprehensive movie data for a title"""
        search_result = self.search_movie(title)
        if not search_result:
            return {"error": f"Movie '{title}' not found"}

        movie_id = search_result.get("id")
        details = self.get_movie_details(movie_id)
        if not details:
            return {"error": f"Could not retrieve details for '{title}'"}

        metadata = self.extract_movie_metadata(details)

        # Get recommendations
        similar_movies = [movie["title"] for movie in details.get("similar", {}).get("results", [])[:5]]
        recommendations = [movie["title"] for movie in details.get("recommendations", {}).get("results", [])[:5]]

        metadata["similar_movies"] = similar_movies
        metadata["recommended_movies"] = recommendations

        return metadata

# Initialize TMDB client
tmdb = EnhancedTMDBClient(TMDB_API_KEY)
print("🎬 TMDB client initialized!")


🎬 TMDB client initialized!


In [None]:

# =============================================================================
# 🛠️ SECTION 4: Original Agent Tools
# =============================================================================

def movie_metadata_lookup_tool(movie_title: str) -> str:
    """Tool for looking up movie metadata from TMDB API"""
    try:
        metadata = tmdb.get_comprehensive_movie_data(movie_title)
        if "error" in metadata:
            return f"Error: {metadata['error']}"
        return json.dumps(metadata, indent=2)
    except Exception as e:
        return f"Error retrieving movie data: {str(e)}"

def movie_hypothesis_generator_tool(movie_title: str, metadata: str) -> str:
    """Tool for generating hypotheses about why user loved a movie"""
    class HypothesisGenerator(dspy.Signature):
        movie_title = dspy.InputField(desc="The movie the user loved")
        movie_metadata = dspy.InputField(desc="Real movie metadata from TMDB")
        hypotheses = dspy.OutputField(desc="Three specific hypotheses about what drew them to the movie")

    generator = dspy.ChainOfThought(HypothesisGenerator)
    result = generator(movie_title=movie_title, movie_metadata=metadata)
    return result.hypotheses

def recommendation_generator_tool(movie_title: str, user_hypothesis: str) -> str:
    """Tool for generating movie recommendations based on user preferences"""
    class RecommendationGenerator(dspy.Signature):
        original_movie = dspy.InputField(desc="Movie the user loved")
        user_preference_hypothesis = dspy.InputField(desc="What the user likely enjoyed about the movie")
        recommendations = dspy.OutputField(desc="Three movie recommendations with brief explanations")

    generator = dspy.ChainOfThought(RecommendationGenerator)
    result = generator(original_movie=movie_title, user_preference_hypothesis=user_hypothesis)
    return result.recommendations

def narrative_constructor_tool(movie_title: str, reason: str) -> str:
    """Tool for constructing compelling narrative explanations"""
    class NarrativeConstructor(dspy.Signature):
        recommended_movie = dspy.InputField(desc="The movie being recommended")
        connection_reason = dspy.InputField(desc="Why this movie connects to user's taste")
        narrative_explanation = dspy.OutputField(desc="A compelling story-driven explanation")

    constructor = dspy.ChainOfThought(NarrativeConstructor)
    result = constructor(recommended_movie=movie_title, connection_reason=reason)
    return result.narrative_explanation

print("🛠️ Original agent tools ready!")


🛠️ Original agent tools ready!


In [None]:

# =============================================================================
# 🤖 SECTION 5: Original Multi-Agent System
# =============================================================================

class MovieAnalysisSignature(dspy.Signature):
    """Analyze a movie to understand user preferences and generate recommendations."""
    movie_title: str = dspy.InputField()
    analysis_result: str = dspy.OutputField(desc="Complete analysis with movie recommendations")

class NarrativeSignature(dspy.Signature):
    """Create compelling narrative explanations for movie recommendations."""
    movie_recommendations: str = dspy.InputField()
    narrative_explanations: str = dspy.OutputField(desc="Compelling narrative explanations")

class OrchestratorSignature(dspy.Signature):
    """Master orchestrator coordinating movie analysis and narrative agents."""
    user_input: str = dspy.InputField()
    final_recommendations: str = dspy.OutputField(desc="Final movie recommendations with narratives")

# Create original agents
original_movie_agent = dspy.ReAct(
    MovieAnalysisSignature,
    tools=[movie_metadata_lookup_tool, movie_hypothesis_generator_tool, recommendation_generator_tool]
)

original_narrative_agent = dspy.ReAct(
    NarrativeSignature,
    tools=[narrative_constructor_tool]
)

# Tools for orchestrator
def call_movie_analysis_agent(movie_title: str) -> str:
    """Call the Movie Analysis Agent"""
    result = original_movie_agent(movie_title=movie_title)
    return result.analysis_result

def call_narrative_agent(recommendations: str) -> str:
    """Call the Narrative Agent"""
    result = original_narrative_agent(movie_recommendations=recommendations)
    return result.narrative_explanations

# Original orchestrator
original_orchestrator = dspy.ReAct(
    OrchestratorSignature,
    tools=[call_movie_analysis_agent, call_narrative_agent, movie_metadata_lookup_tool]
)

print("🤖 Original multi-agent system ready!")


🤖 Original multi-agent system ready!


In [None]:

# =============================================================================
# 📊 SECTION 6: Training Dataset Generation
# =============================================================================

def generate_training_dataset(size: int = 60) -> List[dspy.Example]:
    """Generate training dataset for optimization"""

    # Curated examples of good movie taste patterns
    training_patterns = [
        {
            "input_movie": "Inception",
            "expected_recommendations": ["Memento", "Shutter Island", "The Prestige"],
            "expected_themes": ["mind-bending", "psychological", "complex narrative"],
            "quality_narrative": "If you loved Inception's layered reality and complex storytelling..."
        },
        {
            "input_movie": "The Matrix",
            "expected_recommendations": ["Blade Runner 2049", "Ex Machina", "Ghost in the Shell"],
            "expected_themes": ["artificial intelligence", "reality questioning", "cyberpunk"],
            "quality_narrative": "Like The Matrix, these films explore the nature of reality..."
        },
        {
            "input_movie": "Pulp Fiction",
            "expected_recommendations": ["Reservoir Dogs", "Kill Bill", "Snatch"],
            "expected_themes": ["non-linear narrative", "crime", "dark humor"],
            "quality_narrative": "These films share Tarantino's distinctive storytelling style..."
        },
        {
            "input_movie": "Interstellar",
            "expected_recommendations": ["Arrival", "Contact", "2001: A Space Odyssey"],
            "expected_themes": ["space exploration", "scientific concepts", "emotional depth"],
            "quality_narrative": "Like Interstellar, these films blend hard science with human emotion..."
        },
        {
            "input_movie": "The Dark Knight",
            "expected_recommendations": ["Heat", "The Departed", "Zodiac"],
            "expected_themes": ["crime thriller", "moral complexity", "psychological depth"],
            "quality_narrative": "These films share The Dark Knight's serious approach to crime..."
        }
    ]

    # Generate more patterns programmatically
    additional_patterns = []
    movie_clusters = {
        "horror": ["The Exorcist", "Hereditary", "The Babadook", "Get Out"],
        "comedy": ["The Grand Budapest Hotel", "In Bruges", "Kiss Kiss Bang Bang", "The Nice Guys"],
        "drama": ["There Will Be Blood", "No Country for Old Men", "Moonlight", "Parasite"],
        "action": ["Mad Max: Fury Road", "John Wick", "The Raid", "Baby Driver"],
        "sci-fi": ["Blade Runner", "Alien", "Dune", "Her"]
    }

    for genre, movies in movie_clusters.items():
        for i, movie in enumerate(movies):
            recommendations = [m for j, m in enumerate(movies) if j != i][:3]
            additional_patterns.append({
                "input_movie": movie,
                "expected_recommendations": recommendations,
                "expected_themes": [genre, "quality filmmaking", "genre excellence"],
                "quality_narrative": f"As a fan of {movie}, you'll appreciate these {genre} masterpieces..."
            })

    all_patterns = training_patterns + additional_patterns[:size-len(training_patterns)]

    # Convert to DSPy examples
    training_examples = []
    for pattern in all_patterns:
        # Create the input as a user query
        user_query = f"I loved the movie {pattern['input_movie']}. Can you recommend similar movies?"

        # Create expected output format
        expected_output = f"""
**Recommendations for {pattern['input_movie']} lovers:**

1. **{pattern['expected_recommendations'][0]}**: {pattern['quality_narrative']}
2. **{pattern['expected_recommendations'][1]}**: Connected through {', '.join(pattern['expected_themes'][:2])}
3. **{pattern['expected_recommendations'][2]}**: Shares the same {pattern['expected_themes'][0]} appeal

**Analysis**: Based on your love for {pattern['input_movie']}, I identified these key themes: {', '.join(pattern['expected_themes'])}. These recommendations match those preferences perfectly.
"""

        example = dspy.Example(
            user_input=user_query,
            final_recommendations=expected_output,
            input_movie=pattern['input_movie'],
            expected_themes=pattern['expected_themes'],
            expected_recs=pattern['expected_recommendations']
        ).with_inputs("user_input")

        training_examples.append(example)

    return training_examples

# Generate training and validation sets
print("📊 Generating training dataset...")
full_dataset = generate_training_dataset(80)
random.shuffle(full_dataset)

# Split into train/val
train_size = int(0.7 * len(full_dataset))
trainset = full_dataset[:train_size]
valset = full_dataset[train_size:]

print(f"✅ Dataset created: {len(trainset)} training examples, {len(valset)} validation examples")
print(f"📋 Sample training example:")
print(f"Input: {trainset[0].user_input}")
print(f"Expected output (first 200 chars): {trainset[0].final_recommendations[:200]}...")


📊 Generating training dataset...
✅ Dataset created: 17 training examples, 8 validation examples
📋 Sample training example:
Input: I loved the movie Interstellar. Can you recommend similar movies?
Expected output (first 200 chars): 
**Recommendations for Interstellar lovers:**

1. **Arrival**: Like Interstellar, these films blend hard science with human emotion...
2. **Contact**: Connected through space exploration, scientific c...


In [None]:

# =============================================================================
# 📏 SECTION 7: Custom Evaluation Metrics
# =============================================================================

def extract_recommended_movies(response: str) -> List[str]:
    """Extract movie titles from agent response"""
    # Look for patterns like "1. **Movie Title**" or "**Movie Title**"
    patterns = [
        r'\d+\.\s*\*\*([^*]+)\*\*',  # "1. **Movie Title**"
        r'\*\*([^*]+)\*\*(?=:)',      # "**Movie Title**:"
        r'recommend[^:]*:\s*([^,\n]+)',  # "I recommend: Movie Title"
    ]

    movies = []
    for pattern in patterns:
        matches = re.findall(pattern, response, re.IGNORECASE)
        for match in matches:
            movie = match.strip()
            if len(movie) > 2 and movie not in movies:
                movies.append(movie)

    return movies[:3]  # Return top 3

def recommendation_relevance_metric(example, pred, trace=None) -> float:
    """
    Measure how relevant the recommendations are to the expected movies/themes
    Returns score between 0.0 and 1.0
    """
    try:
        # Extract predicted movies
        predicted_movies = extract_recommended_movies(pred.final_recommendations)
        expected_movies = example.expected_recs
        expected_themes = set(theme.lower() for theme in example.expected_themes)

        if not predicted_movies:
            return 0.0

        # Score 1: Direct movie matches (40% weight)
        movie_score = 0.0
        for pred_movie in predicted_movies:
            for exp_movie in expected_movies:
                # Check for exact or partial matches
                if pred_movie.lower() in exp_movie.lower() or exp_movie.lower() in pred_movie.lower():
                    movie_score += 1.0
                elif any(word in exp_movie.lower().split() for word in pred_movie.lower().split() if len(word) > 3):
                    movie_score += 0.5

        movie_score = min(movie_score, len(expected_movies)) / len(expected_movies)

        # Score 2: Theme relevance (40% weight)
        response_lower = pred.final_recommendations.lower()
        theme_matches = sum(1 for theme in expected_themes if theme in response_lower)
        theme_score = theme_matches / len(expected_themes)

        # Score 3: Response quality (20% weight)
        quality_indicators = [
            "recommendation" in response_lower,
            "similar" in response_lower,
            "love" in response_lower or "enjoy" in response_lower,
            len(pred.final_recommendations) > 200,  # Substantial response
            "**" in pred.final_recommendations  # Formatted properly
        ]
        quality_score = sum(quality_indicators) / len(quality_indicators)

        # Combined score
        total_score = (movie_score * 0.4) + (theme_score * 0.4) + (quality_score * 0.2)
        return min(total_score, 1.0)

    except Exception as e:
        print(f"Error in relevance metric: {e}")
        return 0.0

def narrative_quality_metric(example, pred, trace=None) -> float:
    """
    Measure the quality of narrative explanations
    Returns score between 0.0 and 1.0
    """
    try:
        response = pred.final_recommendations

        # Quality indicators
        indicators = {
            "compelling_language": any(word in response.lower() for word in [
                "captivating", "compelling", "brilliant", "masterpiece", "extraordinary",
                "remarkable", "stunning", "powerful", "moving", "unforgettable"
            ]),
            "connection_explanation": any(phrase in response.lower() for phrase in [
                "like", "similar to", "if you loved", "shares", "connects",
                "reminiscent of", "echoes", "parallels"
            ]),
            "specific_details": any(word in response.lower() for word in [
                "director", "cinematography", "themes", "style", "genre",
                "plot", "character", "atmosphere"
            ]),
            "emotional_appeal": any(word in response.lower() for word in [
                "feel", "experience", "journey", "emotion", "heart",
                "soul", "passion", "depth"
            ]),
            "structure": "**" in response and len(response.split("**")) >= 4,
            "length": 300 <= len(response) <= 1500,  # Optimal length
            "coherence": response.count(".") >= 5,  # Multiple sentences
        }

        score = sum(indicators.values()) / len(indicators)
        return score

    except Exception as e:
        print(f"Error in narrative quality metric: {e}")
        return 0.0

def combined_movie_recommendation_metric(example, pred, trace=None) -> float:
    """
    Combined metric weighing both relevance and narrative quality
    """
    relevance = recommendation_relevance_metric(example, pred, trace)
    narrative = narrative_quality_metric(example, pred, trace)

    # Weight relevance slightly higher than narrative
    combined = (relevance * 0.6) + (narrative * 0.4)
    return combined

print("📏 Custom evaluation metrics ready!")


📏 Custom evaluation metrics ready!


In [None]:

# =============================================================================
# 🎯 SECTION 8: DSPy Optimization Process
# =============================================================================

def run_optimization():
    """Run the complete optimization process"""

    print("🎯 Starting DSPy optimization process...")

    # Set up the optimizer
    optimizer = dspy.MIPROv2(
        metric=combined_movie_recommendation_metric,
        auto="light",  # Use light mode for faster optimization
        num_threads=4,
        verbose=True
    )

    # Create evaluator for baseline measurement
    evaluator = dspy.Evaluate(
        metric=combined_movie_recommendation_metric,
        devset=valset[:10],  # Use smaller set for demo
        display_table=True,
        display_progress=True
    )

    print("📊 Measuring baseline performance...")
    with mlflow.start_run(run_name="baseline_measurement"):
        baseline_score = evaluator(original_orchestrator)
        mlflow.log_metric("baseline_score", baseline_score)
        print(f"🔍 Baseline Score: {baseline_score:.3f}")

    print("🚀 Running optimization...")
    with mlflow.start_run(run_name="optimization_process"):
        optimized_orchestrator = optimizer.compile(
            original_orchestrator,
            trainset=trainset[:30],  # Use subset for demo
            valset=valset[:10],
            requires_permission_to_run=False
        )

        print("📊 Measuring optimized performance...")
        optimized_score = evaluator(optimized_orchestrator)
        mlflow.log_metric("optimized_score", optimized_score)
        mlflow.log_metric("improvement", optimized_score - baseline_score)

        print(f"🎉 Optimization Results:")
        print(f"   Baseline Score: {baseline_score:.3f}")
        print(f"   Optimized Score: {optimized_score:.3f}")
        print(f"   Improvement: {optimized_score - baseline_score:.3f} ({((optimized_score - baseline_score) / baseline_score * 100):.1f}%)")

    return optimized_orchestrator, baseline_score, optimized_score

# Run the optimization
print("⚡ Running optimization process...")
optimized_orchestrator, baseline_score, optimized_score = run_optimization()
print("✅ Optimization complete!")


⚡ Running optimization process...
🎯 Starting DSPy optimization process...
📊 Measuring baseline performance...
  0%|          | 0/8 [00:00<?, ?it/s]

2025/06/30 00:57:01 INFO mlflow.tracking.fluent: Autologging successfully enabled for langchain.


Average Metric: 3.92 / 8 (48.9%): 100%|██████████| 8/8 [01:17<00:00,  9.68s/it]

2025/06/30 00:58:18 INFO dspy.evaluate.evaluate: Average Metric: 3.9154285714285715 / 8 (48.9%)





Unnamed: 0,user_input,example_final_recommendations,input_movie,expected_themes,expected_recs,trajectory,reasoning,pred_final_recommendations,combined_movie_recommendation_metric
0,I loved the movie Parasite. Can you recommend similar movies?,**Recommendations for Parasite lovers:** 1. **There Will Be Blood*...,Parasite,"[drama, quality filmmaking, genre excellence]","[There Will Be Blood, No Country for Old Men, Moonlight]","{'thought_0': 'Since the user enjoyed ""Parasite,"" I should gather ...","The user enjoyed ""Parasite,"" which is known for its intricate stor...",Here are some movie recommendations that align with your appreciat...,✔️ [0.439]
1,I loved the movie Moonlight. Can you recommend similar movies?,**Recommendations for Moonlight lovers:** 1. **There Will Be Blood...,Moonlight,"[drama, quality filmmaking, genre excellence]","[There Will Be Blood, No Country for Old Men, Parasite]","{'thought_0': 'Since the user enjoyed ""Moonlight,"" I should look f...","The user enjoyed ""Moonlight,"" a film that deeply explores themes o...","Based on your enjoyment of ""Moonlight,"" I recommend the following ...",✔️ [0.519]
2,I loved the movie Kiss Kiss Bang Bang. Can you recommend similar m...,**Recommendations for Kiss Kiss Bang Bang lovers:** 1. **The Grand...,Kiss Kiss Bang Bang,"[comedy, quality filmmaking, genre excellence]","[The Grand Budapest Hotel, In Bruges, The Nice Guys]","{'thought_0': 'Since the user enjoyed ""Kiss Kiss Bang Bang,"" I sho...","The user enjoyed ""Kiss Kiss Bang Bang,"" which is characterized by ...",1. **The Nice Guys**: Imagine a sun-soaked Los Angeles in the 1970...,✔️ [0.575]
3,I loved the movie In Bruges. Can you recommend similar movies?,**Recommendations for In Bruges lovers:** 1. **The Grand Budapest ...,In Bruges,"[comedy, quality filmmaking, genre excellence]","[The Grand Budapest Hotel, Kiss Kiss Bang Bang, The Nice Guys]","{'thought_0': 'Since the user enjoyed ""In Bruges,"" I should gather...","The user enjoyed ""In Bruges,"" which is characterized by its dark c...","1. **The Lobster**: Imagine a world where being single is a crime,...",✔️ [0.301]
4,I loved the movie Get Out. Can you recommend similar movies?,**Recommendations for Get Out lovers:** 1. **The Exorcist**: As a ...,Get Out,"[horror, quality filmmaking, genre excellence]","[The Exorcist, Hereditary, The Babadook]","{'thought_0': 'Since the user enjoyed ""Get Out,"" I should look for...","Since you loved ""Get Out,"" I focused on finding films that share s...","1. **Us (2019)** - Directed by Jordan Peele, this film is a master...",✔️ [0.656]
5,I loved the movie Pulp Fiction. Can you recommend similar movies?,**Recommendations for Pulp Fiction lovers:** 1. **Reservoir Dogs**...,Pulp Fiction,"[non-linear narrative, crime, dark humor]","[Reservoir Dogs, Kill Bill, Snatch]","{'thought_0': 'Since the user enjoyed ""Pulp Fiction,"" I should gat...","The user expressed a fondness for ""Pulp Fiction,"" which is known f...","1. **Reservoir Dogs**: Imagine a group of criminals, each with a c...",✔️ [0.574]
6,I loved the movie There Will Be Blood. Can you recommend similar m...,**Recommendations for There Will Be Blood lovers:** 1. **No Countr...,There Will Be Blood,"[drama, quality filmmaking, genre excellence]","[No Country for Old Men, Moonlight, Parasite]","{'thought_0': 'Since the user enjoyed ""There Will Be Blood,"" I sho...","The user expressed a fondness for ""There Will Be Blood,"" a film ri...",1. **The Master**: Imagine a world where the search for meaning dr...,✔️ [0.471]
7,I loved the movie The Dark Knight. Can you recommend similar movies?,**Recommendations for The Dark Knight lovers:** 1. **Heat**: These...,The Dark Knight,"[crime thriller, moral complexity, psychological depth]","[Heat, The Departed, Zodiac]","{'thought_0': 'Since the user loved ""The Dark Knight,"" I should ga...","The user expressed a strong appreciation for ""The Dark Knight,"" wh...",Here are some movie recommendations that align with your love for ...,✔️ [0.382]


🔍 Baseline Score: 48.940
🏃 View run baseline_measurement at: https://dbc-730a958a-fe1c.cloud.databricks.com/ml/experiments/4361855530837854/runs/9643ac395518450e85f182409a94a8fe
🧪 View experiment at: https://dbc-730a958a-fe1c.cloud.databricks.com/ml/experiments/4361855530837854
🚀 Running optimization...


2025/06/30 00:58:20 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 20
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 8

2025/06/30 00:58:20 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/30 00:58:20 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/30 00:58:20 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


 24%|██▎       | 4/17 [03:27<11:12, 51.76s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 4/6


 12%|█▏        | 2/17 [00:59<07:25, 29.70s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 5/6


 24%|██▎       | 4/17 [02:50<09:15, 42.72s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/6


  6%|▌         | 1/17 [00:09<02:36,  9.80s/it]
2025/06/30 01:05:48 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/30 01:05:48 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Error getting source code: unhashable type: 'dict'.

Running without program aware proposer.


2025/06/30 01:05:56 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...



DATA SUMMARY: The dataset effectively facilitates tailored movie recommendations based on users' previously enjoyed films, emphasizing genre identification and thematic similarities. It highlights a user-centric approach by providing analysis for each suggestion, focusing on quality filmmaking and popular selections that appeal to a general audience, while also exploring diverse sub-genres to enhance the movie discovery experience.
Using a randomly generated configuration for our grounded proposer.
Selected tip: high_stakes
task_demos No task demos provided.




[34m[2025-06-30T01:05:59.242690][0m

[31mSystem message:[0m

Your input fields are:
1. `dataset_description` (str): A description of the dataset that we are using.
2. `task_demos` (str): Example inputs/outputs of our module.
3. `basic_instruction` (str): Basic instruction.
4. `tip` (str): A suggestion for how to go about generating the new instruction.
Your output fields are:
1. `proposed_instruction` (str): Propose an inst

2025/06/30 01:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/30 01:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Master orchestrator coordinating movie analysis and narrative agents.

You are an Agent. In each episode, you will be given the fields `user_input` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `final_recommendations`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) call_movie_analysis_agent, whose description is <desc>Call the Movie Analysis Agent</de





[34m[2025-06-30T01:06:08.088776][0m

[31mSystem message:[0m

Your input fields are:
1. `dataset_description` (str): A description of the dataset that we are using.
2. `task_demos` (str): Example inputs/outputs of our module.
3. `basic_instruction` (str): Basic instruction.
4. `tip` (str): A suggestion for how to go about generating the new instruction.
Your output fields are:
1. `proposed_instruction` (str): Propose an instruction that will be used to prompt a Language Model to perform this task.
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## dataset_description ## ]]
{dataset_description}

[[ ## task_demos ## ]]
{task_demos}

[[ ## basic_instruction ## ]]
{basic_instruction}

[[ ## tip ## ]]
{tip}

[[ ## proposed_instruction ## ]]
{proposed_instruction}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Use the information below to learn about a task that we are trying to solve using calls 

2025/06/30 01:06:09 INFO dspy.evaluate.evaluate: Average Metric: 3.9154285714285724 / 8 (48.9%)
2025/06/30 01:06:09 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 48.94






2025/06/30 01:06:09 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 20 =====
2025/06/30 01:06:09 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: As a master orchestrator, you are tasked with guiding a team of agents to analyze a user's favorite movie and generate tailored recommendations. The user has expressed a deep appreciation for "The Matrix," a film rich in themes of reality, identi

2025/06/30 01:08:17 INFO dspy.evaluate.evaluate: Average Metric: 3.7622857142857145 / 8 (47.0%)
2025/06/30 01:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 47.03 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03]
2025/06/30 01:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 20 =====
2025/06/30 01:08:17 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: As a master orchestrator, you are tasked with guiding a team of agents to analyze a user's favorite movie and generate tailored recommendations. The user has expressed a deep appreciation for "The Matrix," a film rich in themes of reality, ident

2025/06/30 01:09:53 INFO dspy.evaluate.evaluate: Average Metric: 3.7028571428571437 / 8 (46.3%)
2025/06/30 01:09:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 46.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/30 01:09:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29]
2025/06/30 01:09:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:09:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 20 =====
2025/06/30 01:09:54 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Master orchestrator coordinating movie analysis and narrative agents.

You are an Agent. In each episode, you will be given the fields `user_input` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `final_recommendations`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) call_movie_analysis_agent, whose description is <desc>Call the Movie Analysis Agent</desc>. It takes arguments {'movie_title': {'type': 'string'}}.
(2) call_narrative_agent, whose description is <desc>Call the Narrative Agent</desc>. It



Average Metric: 3.76 / 8 (47.0%): 100%|██████████| 8/8 [01:26<00:00, 10.82s/it]

2025/06/30 01:11:20 INFO dspy.evaluate.evaluate: Average Metric: 3.762285714285715 / 8 (47.0%)
2025/06/30 01:11:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 47.03 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:11:20 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03]
2025/06/30 01:11:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:11:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 20 =====





2025/06/30 01:11:20 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Based on your previous movie preferences, please provide a list of films similar to the one mentioned, along with a detailed explanation of how each recommendation connects to the themes, genres, or emotional elements of the original film.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final Recommendations:


Average Metric: 3.65 / 8 (45.6%): 100%|██████████| 8/8 [01:22<00:00, 10.33s/it]

2025/06/30 01:12:43 INFO dspy.evaluate.evaluate: Average Metric: 3.646857142857143 / 8 (45.6%)
2025/06/30 01:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 45.59 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/30 01:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59]
2025/06/30 01:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 20 =====





2025/06/30 01:12:43 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Based on your previous movie preferences, please provide a list of films similar to the one mentioned, along with a detailed explanation of how each recommendation connects to the themes, genres, or emotional elements of the original film.
p: Next Tool Args:
Predictor 1
i: As a master orchestrator, you are tasked with guiding a team of agents to analyze a user's favorite movie and generate tailored recommendations. The user has expressed a deep appreciation for "The Matrix," a film rich in themes of reality, identity, and rebellion against control. Your mission is to delve into the essence of this film, employing the movie analysis agent to extract its key themes and characteristics. Once you have gathered this information, you must then collaborate with the narrative agent to craft compelling recommendations that resonate with the user's interests. The stakes are high: failure to provide engaging and relevant suggestions could lead to user dissatisfaction. Ensure that t

2025/06/30 01:12:59 INFO dspy.evaluate.evaluate: Average Metric: 3.793142857142857 / 8 (47.4%)





2025/06/30 01:12:59 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 47.41 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 2'].
2025/06/30 01:12:59 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41]
2025/06/30 01:12:59 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:12:59 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 20 =====
2025/06/30 01:12:59 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Master orchestrator coordinating movie analysis and narrative agents.

You are an Agent. In each episode, you will be given the fields `user_input` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `final_recommendations`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) call_movie_analysis_agent, whose description is <desc>Call the Movie Analysis Agent</desc>. It takes arguments {'movie_title': {'type': 'string'}}.
(2) call_narrative_agent, whose description is <desc>Call the Narrative Agent</desc>. It 

2025/06/30 01:13:25 INFO dspy.evaluate.evaluate: Average Metric: 3.786285714285715 / 8 (47.3%)
2025/06/30 01:13:25 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 47.33 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:13:25 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33]
2025/06/30 01:13:25 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:13:25 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 20 =====
2025/06/30 01:13:25 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: As a master orchestrator, you are tasked with guiding a team of agents to analyze a user's favorite movie and generate tailored recommendations. The user has expressed a deep appreciation for "The Matrix," a film rich in themes of reality, ident

2025/06/30 01:14:49 INFO dspy.evaluate.evaluate: Average Metric: 3.702857142857143 / 8 (46.3%)
2025/06/30 01:14:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 46.29 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 2', 'Predictor 1: Few-Shot Set 1'].
2025/06/30 01:14:49 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29]
2025/06/30 01:14:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 48.94


2025/06/30 01:14:49 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 20 =====
2025/06/30 01:14:49 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 3.95 / 8 (49.3%): 100%|██████████| 8/8 [00:54<00:00,  6.80s/it]

2025/06/30 01:15:44 INFO dspy.evaluate.evaluate: Average Metric: 3.947428571428572 / 8 (49.3%)
2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 49.34
2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 49.34 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34]
2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 49.34


2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 20 =====
2025/06/30 01:15:44 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Master orchestrator coordinating movie analysis and narrative agents.

You are an Agent. In each episode, you will be given the fields `user_input` as input. And you can see your past trajectory so far.
Your goal is to use one or more of the supplied tools to collect any necessary information for producing `final_recommendations`.

To do this, you will interleave next_thought, next_tool_name, and next_tool_args in each turn, and also when finishing the task.
After each tool call, you receive a resulting observation, which gets appended to your trajectory.

When writing next_thought, you may reason about the current situation and plan for future steps.
When selecting the next_tool_name and its next_tool_args, the tool must be one of:

(1) call_movie_analysis_agent, whose description is <desc>Call the Movie Analysis Agent</desc>. It takes arguments {'movie_title': {'type': 'string'}}.
(2) call_narrative_agent, whose description is <desc>Call the Narrative Agent</desc>. It

2025/06/30 01:15:56 INFO dspy.evaluate.evaluate: Average Metric: 3.6799999999999993 / 8 (46.0%)
2025/06/30 01:15:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 46.0 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 4'].
2025/06/30 01:15:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0]
2025/06/30 01:15:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 49.34


2025/06/30 01:15:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 20 =====





2025/06/30 01:15:56 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 3.92 / 8 (49.0%): 100%|██████████| 8/8 [00:23<00:00,  3.00s/it]

2025/06/30 01:16:20 INFO dspy.evaluate.evaluate: Average Metric: 3.9234285714285715 / 8 (49.0%)
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 49.04 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04]
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 49.34


2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 20 =====
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 3.92 / 8 (49.0%): 100%|██████████| 8/8 [00:00<00:00, 10.22it/s]

2025/06/30 01:16:21 INFO dspy.evaluate.evaluate: Average Metric: 3.9234285714285715 / 8 (49.0%)
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 49.04 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 0'].
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04]
2025/06/30 01:16:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 49.34


2025/06/30 01:16:22 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 20 =====





2025/06/30 01:16:22 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 4.35 / 8 (54.4%): 100%|██████████| 8/8 [00:17<00:00,  2.13s/it]

2025/06/30 01:16:39 INFO dspy.evaluate.evaluate: Average Metric: 4.353142857142857 / 8 (54.4%)
2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 54.41
2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 54.41 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 3'].
2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41]
2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 54.41


2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 20 =====





2025/06/30 01:16:39 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 4.35 / 8 (54.4%): 100%|██████████| 8/8 [00:01<00:00,  7.96it/s]

2025/06/30 01:16:40 INFO dspy.evaluate.evaluate: Average Metric: 4.353142857142857 / 8 (54.4%)





2025/06/30 01:16:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 54.41 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 3'].
2025/06/30 01:16:40 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41]
2025/06/30 01:16:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 54.41


2025/06/30 01:16:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 20 =====
2025/06/30 01:16:41 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 4.35 / 8 (54.4%): 100%|██████████| 8/8 [00:01<00:00,  7.24it/s]

2025/06/30 01:16:42 INFO dspy.evaluate.evaluate: Average Metric: 4.353142857142857 / 8 (54.4%)





2025/06/30 01:16:42 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 54.41 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 3'].
2025/06/30 01:16:42 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41]
2025/06/30 01:16:42 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 54.41


2025/06/30 01:16:42 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 20 =====
2025/06/30 01:16:42 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Based on your previous movie preferences, please provide a list of films similar to the one mentioned, along with a detailed explanation of how each recommendation connects to the themes, genres, or emotional elements of the original film.
p: Next Tool Args:
Predictor 1
i: Master orchestrator coordinating movie analysis and narrative agents.
p: Final Recommendations:


Average Metric: 4.26 / 8 (53.3%): 100%|██████████| 8/8 [00:27<00:00,  3.46s/it]

2025/06/30 01:17:10 INFO dspy.evaluate.evaluate: Average Metric: 4.264 / 8 (53.3%)
2025/06/30 01:17:10 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 53.3 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 1', 'Predictor 1: Instruction 0', 'Predictor 1: Few-Shot Set 3'].
2025/06/30 01:17:10 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3]
2025/06/30 01:17:10 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 54.41


2025/06/30 01:17:10 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 20 =====
2025/06/30 01:17:10 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final

2025/06/30 01:17:29 INFO dspy.evaluate.evaluate: Average Metric: 3.963428571428572 / 8 (49.5%)
2025/06/30 01:17:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 49.54 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 3'].
2025/06/30 01:17:29 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3, 49.54]
2025/06/30 01:17:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 54.41


2025/06/30 01:17:29 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 20 =====





2025/06/30 01:17:29 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final 

2025/06/30 01:17:44 INFO dspy.evaluate.evaluate: Average Metric: 4.497142857142857 / 8 (56.2%)
2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 56.21
2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.21 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 1'].
2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3, 49.54, 56.21]
2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 56.21


2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 20 =====
2025/06/30 01:17:44 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...




Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final

2025/06/30 01:17:45 INFO dspy.evaluate.evaluate: Average Metric: 4.497142857142857 / 8 (56.2%)
2025/06/30 01:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.21 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 1'].
2025/06/30 01:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3, 49.54, 56.21, 56.21]
2025/06/30 01:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 56.21


2025/06/30 01:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 20 / 20 =====





2025/06/30 01:17:45 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final 

2025/06/30 01:17:46 INFO dspy.evaluate.evaluate: Average Metric: 4.497142857142857 / 8 (56.2%)





2025/06/30 01:17:46 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 56.21 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 0', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 1'].
2025/06/30 01:17:46 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3, 49.54, 56.21, 56.21, 56.21]
2025/06/30 01:17:46 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 56.21


2025/06/30 01:17:46 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 / 20 =====
2025/06/30 01:17:46 INFO dspy.teleprompt.mipro_optimizer_v2: Evaluating the following candidate program...



Predictor 0
i: Imagine you are a movie aficionado tasked with saving a film festival from disaster. The festival is on the brink of cancellation due to poor attendance, and the organizers have turned to you for help. Your mission is to recommend movies that will captivate the audience based on their previous favorites. Using a detailed analysis of films and their thematic elements, provide tailored movie recommendations that not only align with the viewers' interests but also ignite their passion for cinema. Your recommendations should include insightful commentary on why each film resonates with the themes of the user's favorite film, ensuring that the audience leaves the festival inspired and eager for more.
p: Next Tool Args:
Predictor 1
i: Based on your favorite movie, provide tailored recommendations for similar films that explore similar themes and genres. Include a brief analysis of each recommendation, highlighting key elements that resonate with the user's interests.
p: Final 

2025/06/30 01:18:02 INFO dspy.evaluate.evaluate: Average Metric: 3.936 / 8 (49.2%)
2025/06/30 01:18:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 49.2 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 2', 'Predictor 1: Instruction 1', 'Predictor 1: Few-Shot Set 5'].
2025/06/30 01:18:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [48.94, 47.03, 46.29, 47.03, 45.59, 47.41, 47.33, 46.29, 49.34, 46.0, 49.04, 49.04, 54.41, 54.41, 54.41, 53.3, 49.54, 56.21, 56.21, 56.21, 49.2]
2025/06/30 01:18:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 56.21


2025/06/30 01:18:02 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 56.21!



📊 Measuring optimized performance...
Average Metric: 4.50 / 8 (56.2%): 100%|██████████| 8/8 [00:01<00:00,  6.96it/s]

2025/06/30 01:18:03 INFO dspy.evaluate.evaluate: Average Metric: 4.497142857142857 / 8 (56.2%)





Unnamed: 0,user_input,example_final_recommendations,input_movie,expected_themes,expected_recs,trajectory,reasoning,pred_final_recommendations,combined_movie_recommendation_metric
0,I loved the movie Parasite. Can you recommend similar movies?,**Recommendations for Parasite lovers:** 1. **There Will Be Blood*...,Parasite,"[drama, quality filmmaking, genre excellence]","[There Will Be Blood, No Country for Old Men, Moonlight]","{'thought_0': 'I will analyze the themes and elements of ""Parasite...","Based on your appreciation for ""Parasite,"" I analyzed its themes o...",1. **Get Out**: Imagine a world where the seemingly perfect facade...,✔️ [0.415]
1,I loved the movie Moonlight. Can you recommend similar movies?,**Recommendations for Moonlight lovers:** 1. **There Will Be Blood...,Moonlight,"[drama, quality filmmaking, genre excellence]","[There Will Be Blood, No Country for Old Men, Parasite]","{'thought_0': 'I will analyze the themes and elements of ""Moonligh...","""Moonlight"" is a profound exploration of identity, love, and the c...",**Recommendations for Moonlight lovers:** 1. **Call Me by Your Nam...,✔️ [0.520]
2,I loved the movie Kiss Kiss Bang Bang. Can you recommend similar m...,**Recommendations for Kiss Kiss Bang Bang lovers:** 1. **The Grand...,Kiss Kiss Bang Bang,"[comedy, quality filmmaking, genre excellence]","[The Grand Budapest Hotel, In Bruges, The Nice Guys]","{'thought_0': 'I should analyze the themes and elements of ""Kiss K...","""Kiss Kiss Bang Bang"" is a unique blend of dark comedy, crime, and...",**Recommendations for fans of Kiss Kiss Bang Bang:** 1. **The Nice...,✔️ [0.589]
3,I loved the movie In Bruges. Can you recommend similar movies?,**Recommendations for In Bruges lovers:** 1. **The Grand Budapest ...,In Bruges,"[comedy, quality filmmaking, genre excellence]","[The Grand Budapest Hotel, Kiss Kiss Bang Bang, The Nice Guys]","{'thought_0': 'I will analyze the themes and elements of ""In Bruge...","Based on your appreciation for ""In Bruges,"" I analyzed its themes ...",**Recommendations for In Bruges lovers:** 1. **The Lobster**: Imag...,✔️ [0.439]
4,I loved the movie Get Out. Can you recommend similar movies?,**Recommendations for Get Out lovers:** 1. **The Exorcist**: As a ...,Get Out,"[horror, quality filmmaking, genre excellence]","[The Exorcist, Hereditary, The Babadook]","{'thought_0': 'I will analyze the themes and elements of ""Get Out""...","""Get Out"" is a masterful blend of psychological horror and social ...",**Recommendations for fans of Get Out:** 1. **Hush**: This film fe...,✔️ [0.519]
5,I loved the movie Pulp Fiction. Can you recommend similar movies?,**Recommendations for Pulp Fiction lovers:** 1. **Reservoir Dogs**...,Pulp Fiction,"[non-linear narrative, crime, dark humor]","[Reservoir Dogs, Kill Bill, Snatch]",{'thought_0': 'I should analyze the thematic elements and narrativ...,"Based on your love for ""Pulp Fiction,"" I analyzed its thematic ele...",**Recommendations for Pulp Fiction lovers:** 1. **Reservoir Dogs**...,✔️ [0.840]
6,I loved the movie There Will Be Blood. Can you recommend similar m...,**Recommendations for There Will Be Blood lovers:** 1. **No Countr...,There Will Be Blood,"[drama, quality filmmaking, genre excellence]","[No Country for Old Men, Moonlight, Parasite]","{'thought_0': 'I should analyze the themes and elements of ""There ...","""There Will Be Blood"" is a powerful exploration of ambition, greed...",**Recommendations for fans of There Will Be Blood:** 1. **The Mast...,✔️ [0.576]
7,I loved the movie The Dark Knight. Can you recommend similar movies?,**Recommendations for The Dark Knight lovers:** 1. **Heat**: These...,The Dark Knight,"[crime thriller, moral complexity, psychological depth]","[Heat, The Departed, Zodiac]",{'thought_0': 'I should analyze the themes and elements of The Dar...,The Dark Knight is renowned for its exploration of moral complexit...,**Recommendations for The Dark Knight lovers:** 1. **Se7en (1995)*...,✔️ [0.600]


🎉 Optimization Results:
   Baseline Score: 48.940
   Optimized Score: 56.210
   Improvement: 7.270 (14.9%)
🏃 View run optimization_process at: https://dbc-730a958a-fe1c.cloud.databricks.com/ml/experiments/4361855530837854/runs/3e2783600e1342cd97a9c8b45559f613
🧪 View experiment at: https://dbc-730a958a-fe1c.cloud.databricks.com/ml/experiments/4361855530837854
✅ Optimization complete!


In [None]:

# =============================================================================
# 🚀 SECTION 9: Comparison Interface
# =============================================================================

def create_comparison_interface():
    """Create interface showing original vs optimized system"""

    def compare_systems(movie_title: str):
        """Compare original vs optimized recommendations"""
        if not movie_title.strip():
            return "Please enter a movie title!", "", "", ""

        user_query = f"I loved the movie {movie_title}. Can you recommend similar movies?"

        try:
            # Get original system response
            print(f"🤖 Getting original system response for '{movie_title}'...")
            original_result = original_orchestrator(user_input=user_query)
            original_response = original_result.final_recommendations

            # Get optimized system response
            print(f"🎯 Getting optimized system response for '{movie_title}'...")
            optimized_result = optimized_orchestrator(user_input=user_query)
            optimized_response = optimized_result.final_recommendations

            # Analyze improvements
            improvements = analyze_improvements(original_response, optimized_response, movie_title)

            # Performance comparison
            performance_comparison = f"""
**🔍 Performance Analysis:**

**Baseline System Score**: {baseline_score:.3f}
**Optimized System Score**: {optimized_score:.3f}
**Improvement**: {optimized_score - baseline_score:.3f} ({((optimized_score - baseline_score) / baseline_score * 100):.1f}%)

**Key Optimization Areas**:
• Recommendation Relevance: Better theme matching
• Narrative Quality: More compelling explanations
• Agent Coordination: Improved multi-agent workflow
• TMDB Integration: Enhanced real data usage

**Training Data**: {len(trainset)} examples
**Validation Data**: {len(valset)} examples
**Optimization Method**: DSPy MIPROv2 with custom metrics
"""

            return original_response, optimized_response, improvements, performance_comparison

        except Exception as e:
            error_msg = f"Error comparing systems: {str(e)}"
            return error_msg, "", "", ""

    def analyze_improvements(original: str, optimized: str, movie: str) -> str:
        """Analyze specific improvements between versions"""

        orig_movies = extract_recommended_movies(original)
        opt_movies = extract_recommended_movies(optimized)

        analysis = f"""
**🎯 Improvement Analysis for "{movie}":**

**Original Recommendations**: {', '.join(orig_movies) if orig_movies else 'None extracted'}
**Optimized Recommendations**: {', '.join(opt_movies) if opt_movies else 'None extracted'}

**Length Comparison**:
• Original Response: {len(original)} characters
• Optimized Response: {len(optimized)} characters

**Quality Indicators**:
• **Formatting**: {'✅' if '**' in optimized else '❌'} Better formatting
• **Detail Level**: {'✅' if len(optimized) > len(original) else '❌'} More detailed explanations
• **Movie Count**: {'✅' if len(opt_movies) >= len(orig_movies) else '❌'} Adequate recommendations
• **Narrative Elements**: {'✅' if any(word in optimized.lower() for word in ['love', 'enjoy', 'similar', 'like']) else '❌'} Compelling language

**Optimization Impact**:
The optimized system was trained on {len(trainset)} examples to improve:
1. **Thematic Relevance**: Better matching of movie themes and genres
2. **Narrative Coherence**: More compelling "why you'll love this" explanations
3. **Response Structure**: Clearer formatting and organization
4. **Agent Coordination**: Improved multi-agent collaboration

**Training Focus**: The optimization specifically targeted recommendation accuracy and narrative quality using custom evaluation metrics.
"""
        return analysis

    # Create Gradio interface
    interface = gr.Interface(
        fn=compare_systems,
        inputs=[
            gr.Textbox(
                label="🎬 Movie Title",
                placeholder="Try: Inception, The Matrix, Pulp Fiction, Interstellar",
                lines=1
            )
        ],
        outputs=[
            gr.Textbox(label="🤖 Original System Response", lines=15),
            gr.Textbox(label="🎯 Optimized System Response", lines=15),
            gr.Textbox(label="📊 Improvement Analysis", lines=12),
            gr.Textbox(label="⚖️ Performance Metrics", lines=10)
        ],
        title="🎯 DSPy Multi-Agent Optimization: Before vs After",
        description=f"""
        **Real DSPy Optimization Demonstration**

        🎯 **Optimization Results**: {optimized_score:.3f} vs {baseline_score:.3f} baseline ({((optimized_score - baseline_score) / baseline_score * 100):.1f}% improvement)

        📊 **What Was Optimized**:
        • **Training Data**: {len(trainset)} curated movie recommendation examples
        • **Evaluation Metrics**: Custom metrics for recommendation relevance + narrative quality
        • **Optimization Method**: DSPy MIPROv2 with multi-agent coordination
        • **Focus Areas**: Thematic accuracy, compelling narratives, better agent collaboration

        🎬 **Try It**: Enter any movie title to see how optimization improved the recommendations!
        """,
        examples=[
            ["Inception"],
            ["The Matrix"],
            ["Pulp Fiction"],
            ["Interstellar"],
            ["The Dark Knight"]
        ],
        theme=gr.themes.Soft()
    )

    return interface

# Create and launch the comparison interface
comparison_demo = create_comparison_interface()

print("🚀 Comparison interface ready!")


🚀 Comparison interface ready!


In [None]:

# =============================================================================
# 🎉 SECTION 10: Results Summary and Next Steps
# =============================================================================

def display_optimization_summary():
    """Display comprehensive summary of optimization results"""

    summary = f"""
# 🎯 DSPy Multi-Agent Optimization - Complete Results

## 📊 Performance Improvement
- **Baseline Score**: {baseline_score:.3f}
- **Optimized Score**: {optimized_score:.3f}
- **Improvement**: {optimized_score - baseline_score:.3f} ({((optimized_score - baseline_score) / baseline_score * 100):.1f}%)

## 🎬 System Architecture
**Original Multi-Agent Components**:
- Movie Analysis Agent (hypothesis generation)
- Narrative Agent (story construction)
- Master Orchestrator (agent coordination)
- TMDB Integration (real movie data)

**Optimization Enhancements**:
- Custom evaluation metrics for movie recommendations
- Training dataset with {len(trainset)} curated examples
- DSPy MIPROv2 optimizer with light configuration
- Focus on recommendation relevance + narrative quality

## 🎯 Key Improvements Demonstrated
1. **Better Recommendations**: More thematically relevant movie suggestions
2. **Enhanced Narratives**: More compelling "why you'll love this" explanations
3. **Improved Structure**: Better formatting and organization
4. **Smarter Coordination**: More efficient multi-agent collaboration

## 🚀 Technical Implementation
- **Framework**: DSPy ReAct agents with tool usage
- **Data Source**: TMDB API for real movie metadata
- **Optimization**: MIPROv2 with custom movie recommendation metrics
- **Evaluation**: Combined relevance + narrative quality scoring
- **Tracking**: MLflow for experiment management

## 🎬 Next Steps for Production
1. **Expand Training Data**: Include more diverse movie preferences
2. **Advanced Metrics**: Add user satisfaction and click-through rate simulation
3. **A/B Testing**: Deploy optimized system alongside baseline for comparison
4. **Continuous Learning**: Regular retraining with new user preference data
5. **Domain Expansion**: Apply same optimization approach to TV shows, books, music

## 🔧 Code Availability
This complete optimization pipeline demonstrates:
- Real multi-agent system optimization using DSPy
- Custom evaluation metrics for domain-specific tasks
- Training data generation for recommendation systems
- Before/after comparison interface
- Production-ready optimization workflow

🎉 **Ready to scale and deploy!**
"""

    print(summary)
    return summary

# Display final summary
optimization_summary = display_optimization_summary()

print("\n" + "="*80)
print("🎉 DSPy MULTI-AGENT OPTIMIZATION DEMONSTRATION COMPLETE!")
print("="*80)
print(f"🎯 Achieved {((optimized_score - baseline_score) / baseline_score * 100):.1f}% improvement in movie recommendation quality")
print("🚀 Launch the Gradio interface below to try the optimized system!")
print("="*80)

# Launch the comparison interface
comparison_demo.launch(share=True, debug=True)


# 🎯 DSPy Multi-Agent Optimization - Complete Results

## 📊 Performance Improvement
- **Baseline Score**: 48.940
- **Optimized Score**: 56.210  
- **Improvement**: 7.270 (14.9%)

## 🎬 System Architecture
**Original Multi-Agent Components**:
- Movie Analysis Agent (hypothesis generation)
- Narrative Agent (story construction)  
- Master Orchestrator (agent coordination)
- TMDB Integration (real movie data)

**Optimization Enhancements**:
- Custom evaluation metrics for movie recommendations
- Training dataset with 17 curated examples
- DSPy MIPROv2 optimizer with light configuration
- Focus on recommendation relevance + narrative quality

## 🎯 Key Improvements Demonstrated
1. **Better Recommendations**: More thematically relevant movie suggestions
2. **Enhanced Narratives**: More compelling "why you'll love this" explanations
3. **Improved Structure**: Better formatting and organization
4. **Smarter Coordination**: More efficient multi-agent collaboration

## 🚀 Technical Implementation


🤖 Getting original system response for 'The Inception'...
🎯 Getting optimized system response for 'The Inception'...
🤖 Getting original system response for 'The Matrix'...
🎯 Getting optimized system response for 'The Matrix'...
🤖 Getting original system response for 'Parasite'...
🎯 Getting optimized system response for 'Parasite'...
🤖 Getting original system response for 'Mad Max: Fury Road'...
🎯 Getting optimized system response for 'Mad Max: Fury Road'...
🤖 Getting original system response for 'Her'...




🎯 Getting optimized system response for 'Her'...
🤖 Getting original system response for 'Pulp Fiction'...
🎯 Getting optimized system response for 'Pulp Fiction'...
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://8a94f57710d9e20417.gradio.live


