# DSPy Intent Generation Approach

This notebook implements a 3-step process to generate realistic food delivery queries using DSPy:
1. Generate pure user intents (no food bias)
2. Smart 1:1 matching of foods to best-fitting intents
3. Generate both intent queries and food-aware final queries

Key insight: Original intents work as standalone queries ~75% of the time!

In [2]:
import json
import os
import sys
from datetime import datetime
from pathlib import Path

import dspy
import pandas as pd
from dotenv import load_dotenv
from pydantic import BaseModel, Field

load_dotenv()

# Add the project root to Python path for imports
project_root = os.environ.get("root_folder")
print(f"Project root from env: {project_root}")
if project_root:
    sys.path.insert(0, str(project_root))

from database.utils.db_utils import get_table
from src.data_generation.dspy_schemas import setup_dspy_model

Project root from env: /Users/luvsuneja/Documents/repos/masala-embed/esci-dataset


## Custom Pydantic Models for Intent Generation

In [3]:
class IntentList(BaseModel):
    """Schema for Step 1: List of pure user intents."""

    intents: list[str] = Field(description="List of pure user search intents")


class IntentMatch(BaseModel):
    """Schema for a single intent-to-food match."""

    consumable_id: int = Field(description="Food item ID")
    consumable_name: str = Field(description="Food item name")
    intent: str = Field(description="Matched user intent")
    reasoning: str = Field(description="Reasoning for the match")


class IntentMatches(BaseModel):
    """Schema for Step 2: Collection of intent-to-food matches."""

    matches: list[IntentMatch] = Field(description="List of intent-food matches")


class IntentQueryResult(BaseModel):
    """Schema for a single query result with metadata."""

    consumable_id: int = Field(description="Food item ID")
    consumable_name: str = Field(description="Food item name")
    original_intent: str = Field(description="Original user intent")
    queries: list[str] = Field(description="Generated queries for this food")


class IntentQueryOutput(BaseModel):
    """Schema for Step 3: Complete query generation output."""

    query_results: list[IntentQueryResult] = Field(
        description="List of query results per food"
    )

## DSPy Signatures and Modules

In [4]:
class IntentGenerationSignature(dspy.Signature):
    """DSPy signature for generating pure user intents."""

    prompt: str = dspy.InputField(desc="Intent generation prompt")
    intent_list: IntentList = dspy.OutputField(desc="List of pure user search intents")


class IntentMatchingSignature(dspy.Signature):
    """DSPy signature for matching intents to foods."""

    matching_prompt: str = dspy.InputField(
        desc="Prompt with intents and foods to match"
    )
    intent_matches: IntentMatches = dspy.OutputField(
        desc="Intent-to-food matches with reasoning"
    )


class IntentQuerySignature(dspy.Signature):
    """DSPy signature for generating final queries from matches."""

    query_prompt: str = dspy.InputField(
        desc="Prompt for generating queries from matches"
    )
    query_output: IntentQueryOutput = dspy.OutputField(
        desc="Generated queries for all matched foods"
    )


class IntentGenerator(dspy.Module):
    """DSPy module for Step 1: Generate pure user intents."""

    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought(IntentGenerationSignature)

    def forward(self, prompt: str) -> IntentList:
        result = self.generate(prompt=prompt)
        return result.intent_list


class IntentMatcher(dspy.Module):
    """DSPy module for Step 2: Match intents to foods."""

    def __init__(self):
        super().__init__()
        self.match = dspy.ChainOfThought(IntentMatchingSignature)

    def forward(self, matching_prompt: str) -> IntentMatches:
        result = self.match(matching_prompt=matching_prompt)
        return result.intent_matches


class IntentQueryGenerator(dspy.Module):
    """DSPy module for Step 3: Generate final queries."""

    def __init__(self):
        super().__init__()
        self.generate_queries = dspy.ChainOfThought(IntentQuerySignature)

    def forward(self, query_prompt: str) -> IntentQueryOutput:
        result = self.generate_queries(query_prompt=query_prompt)
        return result.query_output

## Configuration and Setup

In [None]:
# Configuration
MODEL = "gpt-5"
TEMPERATURE = 1.0
NUM_INTENTS = 50
BATCH_SIZE = 10
LIMIT = 20  # Number of foods to process
QUERIES_PER_ITEM = 3  # Number of queries to generate per food item
STOP_AT_INTENTS = False  # Set to True to stop after step 2 (intent matching)

print("üöÄ Starting DSPy intent-driven query generation...")
print(f"Model: {MODEL}")
print(f"Temperature: {TEMPERATURE}")
print(f"Number of intents: {NUM_INTENTS}")
print(f"Food limit: {LIMIT}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Queries per item: {QUERIES_PER_ITEM}")
print(f"Stop at intents: {STOP_AT_INTENTS}")

In [6]:
# Setup DSPy
def setup_dspy_client(model: str = "gpt-5", temperature: float = 1.0):
    """Setup DSPy with OpenAI client."""
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY not found in environment variables")

    setup_dspy_model(api_key, model, temperature)
    print(f"‚úÖ DSPy setup complete with model: {model}, temperature: {temperature}")


setup_dspy_client(MODEL, TEMPERATURE)

‚úÖ DSPy setup complete with model: gpt-5, temperature: 1.0


## Step 1: Generate Pure User Intents

In [None]:
def step1_generate_intents(num_intents: int = 50) -> list[str]:
    """Step 1: Generate pure user intents with no food bias using DSPy."""

    # Load v1.1 intent generation prompt
    prompt_path = "prompts/intent_generation/v1.1_intent_generation.txt"
    prompt_path = os.path.join(project_root, prompt_path)
    try:
        with open(prompt_path, encoding="utf-8") as f:
            intent_prompt = f.read()
    except FileNotFoundError:
        raise FileNotFoundError(f"Intent generation prompt not found at: {prompt_path}")

    # Replace the default number with the requested number
    intent_prompt = intent_prompt.replace(
        "Generate 50 diverse", f"Generate {num_intents} diverse"
    )
    intent_prompt = intent_prompt.replace(
        "Return a simple list of 50 search queries",
        f"Return a simple list of {num_intents} search queries",
    )

    # Generate intents using DSPy
    try:
        generator = IntentGenerator()
        result = generator.forward(intent_prompt)
        intents = result.intents

        print(f"Generated {len(intents)} user intents")
        return intents

    except Exception as e:
        print(f"Error in step1_generate_intents: {e}")
        raise

In [42]:
pd.DataFrame(intents).sample(10)

Unnamed: 0,0
16,low-carb dinner ideas
33,shareable plates for game night
8,office lunch for team of 6
4,cheap eats under 10 bucks
15,gluten-free options tonight
27,halal options near me
43,new places with free delivery
17,date night dinner for two
2,comfort food for bad day
34,curbside pickup close by


## Load Food Data

In [43]:
def load_food_data(limit: int | None = None) -> pd.DataFrame:
    """Load food candidates data from database."""
    print("üìä Loading consumable data from database...")
    try:
        # Load ENTIRE table first for true randomization
        df = get_table("consumable", limit=None)
        print(f"Loaded {len(df)} total records from consumable table")
        if len(df) == 0:
            raise ValueError("No data found in consumable table")

        # Shuffle the ENTIRE dataframe with fixed seed for reproducibility
        df = df.sample(frac=1, random_state=42).reset_index(drop=True)
        print("Shuffled entire dataset with seed=42")

        # Apply limit AFTER shuffling to get truly random subset
        if limit is not None:
            df = df.head(limit)
            print(f"Selected top {len(df)} records after shuffling")

        # Rename columns to match expected format
        if "id" not in df.columns and "consumable_id" in df.columns:
            df = df.rename(columns={"consumable_id": "id"})

        print(f"‚úÖ Successfully processed {len(df)} food items")
        return df

    except Exception as e:
        print(f"‚ùå Error loading food data: {e}")
        raise


# Load food data
food_df = load_food_data(limit=LIMIT)
print("\nüìä Sample foods:")
print(food_df[["id", "consumable_name"]].head())

üìä Loading consumable data from database...


  df = pd.read_sql_query(query, conn)


Loaded 25574 total records from consumable table
Shuffled entire dataset with seed=42
Selected top 20 records after shuffling
‚úÖ Successfully processed 20 food items

üìä Sample foods:
      id               consumable_name
0  21496            Fried Potato Slice
1   6473             Cheeseburger Meal
2   2584     Grilled Fish with Noodles
3   5706                  Fresh Greens
4  23138  Fried Ham and Asparagus Roll


In [44]:
food_df.head(10)

Unnamed: 0,id,image_url,consumable_name,consumable_type,consumable_ingredients,consumable_portion_size,consumable_nutritional_profile,consumable_cooking_method,created_at
0,21496,https://file.b18a.io/7832991235000105250_33757...,Fried Potato Slice,Homemade food,"[""potato"",""oil""]","[""potato:100g""]","{'fat_g': 0.1, 'protein_g': 0.5, 'calories_kca...",Fried,2025-09-21 09:11:39.103523+00:00
1,6473,https://file.b18a.io/7838020724700102611_77507...,Cheeseburger Meal,Restaurant food,"[""burger patty"",""cheese"",""lettuce"",""sesame bun...","[""burger:250g"",""corn salad:100g"",""drink:300ml""]","{'fat_g': 40.0, 'protein_g': 30.0, 'calories_k...",Fried and assembled,2025-09-21 09:11:39.103523+00:00
2,2584,https://file.b18a.io/7836959712600105016_21209...,Grilled Fish with Noodles,Restaurant food,"[""fish"",""noodles"",""green onions"",""tomatoes""]","[""fish:300g"",""noodles:100g"",""vegetables:50g""]","{'fat_g': 20.0, 'protein_g': 40.0, 'calories_k...",Grilling,2025-09-21 09:11:39.103523+00:00
3,5706,https://file.b18a.io/7850423884600102768_49173...,Fresh Greens,Raw vegetables and fruits,"[""leafy greens""]","[""greens:200g""]","{'fat_g': 0.5, 'protein_g': 2, 'calories_kcal'...",Raw,2025-09-21 09:11:39.103523+00:00
4,23138,https://file.b18a.io/7835501750900104711_99495...,Fried Ham and Asparagus Roll,Restaurant food,"[""ham"",""asparagus"",""bread crumbs"",""oil""]","[""ham:150g"",""asparagus:50g"",""breading:100g""]","{'fat_g': 25.0, 'protein_g': 20.0, 'calories_k...",Fried,2025-09-21 09:11:39.103523+00:00
5,228,https://file.b18a.io/7836897959300107195_10044...,Hot Pot,Restaurant food,"[""chicken feet"",""peanuts"",""spicy broth"",""sauce""]","[""chicken feet:150g"",""peanuts:100g"",""sauce:50g""]","{'fat_g': 30.0, 'protein_g': 40.0, 'calories_k...",boiling,2025-09-21 09:11:39.103523+00:00
6,12128,https://file.b18a.io/7833531866400107749_80708...,Fig and Burrata Salad,Restaurant food,"[""figs"",""burrata"",""pistachios"",""basil"",""flatbr...","[""figs:200g"",""burrata:150g"",""pistachios:30g"",""...","{'fat_g': 25.0, 'protein_g': 15.0, 'calories_k...","No cooking involved, served fresh",2025-09-21 09:11:39.103523+00:00
7,7131,https://file.b18a.io/7833031139300106712_41100...,Steamed Meat with Sauce,Restaurant food,"[""ground meat"",""green beans"",""sauce""]","[""meat:250g"",""sauce:50g""]","{'fat_g': 15.0, 'protein_g': 20.0, 'calories_k...",steaming,2025-09-21 09:11:39.103523+00:00
8,3790,https://file.b18a.io/7839306167200102895_27961...,Beef Soup,Homemade food,"[""beef"",""bok choy"",""broth""]","[""beef:200g"",""bok choy:150g"",""broth:300ml""]","{'fat_g': 15.0, 'protein_g': 30.0, 'calories_k...",boiling,2025-09-21 09:11:39.103523+00:00
9,9215,https://file.b18a.io/7835253185600107104_52647...,Korean Fried Chicken,Restaurant food,"[""fried chicken"",""soy sauce"",""mayonnaise"",""pic...","[""chicken:400g"",""sauces:50g""]","{'fat_g': 45.0, 'protein_g': 40.0, 'calories_k...",Fried,2025-09-21 09:11:39.103523+00:00


## Step 2: Match Intents to Foods

In [None]:
def step2_match_intents_to_foods(intents: list[str], food_df: pd.DataFrame) -> dict:
    """Step 2: Smart 1:1 matching of foods to best-fitting intents using DSPy."""

    # Load v1.2 intent matching prompt
    prompt_path = "prompts/intent_generation/v1.2_intent_matching.txt"
    prompt_path = os.path.join(project_root, prompt_path)
    try:
        with open(prompt_path, encoding="utf-8") as f:
            matching_prompt_template = f.read()
    except FileNotFoundError:
        raise FileNotFoundError(f"Intent matching prompt not found at: {prompt_path}")

    # Format the prompt with actual data
    intents_list = chr(10).join(
        [f"{i + 1}. {intent}" for i, intent in enumerate(intents)]
    )
    food_dataframe = food_df.to_markdown(index=False)

    matching_prompt = matching_prompt_template.format(
        intents_list=intents_list, food_dataframe=food_dataframe
    )

    # Get matches using DSPy
    try:
        matcher = IntentMatcher()
        result = matcher.forward(matching_prompt)

        # Convert Pydantic result to dict format for backward compatibility
        matches = {"matches": [match.dict() for match in result.matches]}

        print(f"Matched {len(matches['matches'])} foods to intents")
        return matches

    except Exception as e:
        print(f"Error in step2_match_intents_to_foods: {e}")
        raise

In [57]:
food_df.shape

(20, 9)

In [58]:
# Test the matching function (optional - will be called in batch processing later)
matches = step2_match_intents_to_foods(intents, food_df)



üéØ Step 2: Matching foods to best intents...


/var/folders/jk/dyw0vdnx2jg9lyq8m01n8nfm0000gn/T/ipykernel_52053/2908078118.py:31: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  matches = {"matches": [match.dict() for match in result.matches]}


In [59]:
matches

{'matches': [{'consumable_id': 21496,
   'consumable_name': 'Fried Potato Slice',
   'intent': 'something crispy for movie night',
   'reasoning': 'Light, fried potato slices are classic crispy snack fare ideal for casual munching during a movie.'},
  {'consumable_id': 6473,
   'consumable_name': 'Cheeseburger Meal',
   'intent': 'something cheesy and hot',
   'reasoning': 'A freshly made cheeseburger with melted cheese squarely fits the craving for something cheesy, hot, and satisfying.'},
  {'consumable_id': 2584,
   'consumable_name': 'Grilled Fish with Noodles',
   'intent': 'protein-packed meal fast',
   'reasoning': 'Grilled fish provides 40g protein and is a straightforward, speedy restaurant dish for a protein-focused meal.'},
  {'consumable_id': 5706,
   'consumable_name': 'Fresh Greens',
   'intent': 'something fresh and healthy',
   'reasoning': 'Raw leafy greens are light, fresh, and nutrient-dense‚Äîan obvious pick for a healthy craving.'},
  {'consumable_id': 23138,
   'c

In [61]:
matches

{'matches': [{'consumable_id': 21496,
   'consumable_name': 'Fried Potato Slice',
   'intent': 'something crispy for movie night',
   'reasoning': 'Light, fried potato slices are classic crispy snack fare ideal for casual munching during a movie.'},
  {'consumable_id': 6473,
   'consumable_name': 'Cheeseburger Meal',
   'intent': 'something cheesy and hot',
   'reasoning': 'A freshly made cheeseburger with melted cheese squarely fits the craving for something cheesy, hot, and satisfying.'},
  {'consumable_id': 2584,
   'consumable_name': 'Grilled Fish with Noodles',
   'intent': 'protein-packed meal fast',
   'reasoning': 'Grilled fish provides 40g protein and is a straightforward, speedy restaurant dish for a protein-focused meal.'},
  {'consumable_id': 5706,
   'consumable_name': 'Fresh Greens',
   'intent': 'something fresh and healthy',
   'reasoning': 'Raw leafy greens are light, fresh, and nutrient-dense‚Äîan obvious pick for a healthy craving.'},
  {'consumable_id': 23138,
   'c

## Step 3: Generate Final Queries

In [None]:
def step3_generate_final_queries(
    matches: dict, queries_per_item: int = 3
) -> list[dict]:
    """Step 3: Generate final queries for all matched pairs using DSPy."""

    # Load v1.3 intent query generation prompt
    prompt_path = "prompts/intent_generation/v1.3_intent_query_generation.txt"
    prompt_path = os.path.join(project_root, prompt_path)
    try:
        with open(prompt_path, encoding="utf-8") as f:
            batch_prompt_template = f.read()
    except FileNotFoundError:
        raise FileNotFoundError(
            f"Intent query generation prompt not found at: {prompt_path}"
        )

    # Format intent-food pairs
    intent_food_pairs = ""
    for i, match in enumerate(matches["matches"], 1):
        intent_food_pairs += f"""
{i}. Intent: "{match["intent"]}"
   Food: {match["consumable_name"]} (ID: {match["consumable_id"]})
"""

    # Format the prompt with actual data
    batch_prompt = batch_prompt_template.format(
        intent_food_pairs=intent_food_pairs, queries_per_item=queries_per_item
    )

    # Generate queries using DSPy
    try:
        query_generator = IntentQueryGenerator()
        result = query_generator.forward(batch_prompt)

        # Convert to flat list format
        final_queries = []
        for query_result in result.query_results:
            # Add intent as standalone query
            final_queries.append(
                {
                    "consumable_id": query_result.consumable_id,
                    "consumable_name": query_result.consumable_name,
                    "query": query_result.original_intent,
                    "query_type": "intent",
                    "original_intent": query_result.original_intent,
                    "generated_at": datetime.now().isoformat(),
                }
            )

            # Add bridged queries
            for query in query_result.queries:
                final_queries.append(
                    {
                        "consumable_id": query_result.consumable_id,
                        "consumable_name": query_result.consumable_name,
                        "query": query,
                        "query_type": "bridged",
                        "original_intent": query_result.original_intent,
                        "generated_at": datetime.now().isoformat(),
                    }
                )

        print(f"Generated {len(final_queries)} total queries")
        return final_queries

    except Exception as e:
        print(f"Error in step3_generate_final_queries: {e}")
        raise

## Process Foods in Batches

In [103]:
# Step 2: Process foods in batches for matching
BATCH_SIZE = 20
all_batch_matches = []
total_batches = (len(food_df) + BATCH_SIZE - 1) // BATCH_SIZE

for batch_idx in range(1):
    start_idx = batch_idx * BATCH_SIZE
    end_idx = min(start_idx + BATCH_SIZE, len(food_df))
    batch_df = food_df.iloc[start_idx:end_idx]

    print(
        f"\nüì¶ Processing batch {batch_idx + 1}/{total_batches} ({len(batch_df)} foods) for matching"
    )

    # Step 2: Smart matching for this batch
    batch_matches = step2_match_intents_to_foods(intents, batch_df)

    all_batch_matches.append(batch_matches)

print(f"‚úÖ Step 2 complete: matched {len(all_batch_matches)} batches")




üì¶ Processing batch 1/1 (20 foods) for matching
üéØ Step 2: Matching foods to best intents...
‚úÖ Step 2 complete: matched 1 batches


/var/folders/jk/dyw0vdnx2jg9lyq8m01n8nfm0000gn/T/ipykernel_52053/2908078118.py:31: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  matches = {"matches": [match.dict() for match in result.matches]}


In [104]:
all_batch_matches

[{'matches': [{'consumable_id': 21496,
    'consumable_name': 'Fried Potato Slice',
    'intent': 'something crispy for movie night',
    'reasoning': 'Light, fried potato slices are classic crispy snack fare ideal for casual munching during a movie.'},
   {'consumable_id': 6473,
    'consumable_name': 'Cheeseburger Meal',
    'intent': 'something cheesy and hot',
    'reasoning': 'A freshly made cheeseburger with melted cheese squarely fits the craving for something cheesy, hot, and satisfying.'},
   {'consumable_id': 2584,
    'consumable_name': 'Grilled Fish with Noodles',
    'intent': 'protein-packed meal fast',
    'reasoning': 'Grilled fish provides 40g protein and is a straightforward, speedy restaurant dish for a protein-focused meal.'},
   {'consumable_id': 5706,
    'consumable_name': 'Fresh Greens',
    'intent': 'something fresh and healthy',
    'reasoning': 'Raw leafy greens are light, fresh, and nutrient-dense‚Äîan obvious pick for a healthy craving.'},
   {'consumable_

In [None]:
# Step 3: Generate final queries for all batches
all_final_queries = []
all_matches = {"matches": []}

for batch_idx, batch_matches in enumerate(all_batch_matches):
    print(
        f"\nüì¶ Processing batch {batch_idx + 1}/{len(all_batch_matches)} for query generation"
    )

    # Accumulate matches
    all_matches["matches"].extend(batch_matches["matches"])

    # Step 3: Generate final queries for this batch (if not stopping at intents)
    if not STOP_AT_INTENTS:
        batch_queries = step3_generate_final_queries(batch_matches, QUERIES_PER_ITEM)
        all_final_queries.extend(batch_queries)

final_queries = all_final_queries

# Summary
if STOP_AT_INTENTS:
    print("\n‚úÖ Success!")
    print("üìä Generated intent-to-food matches:")
    print(f"   üéØ {len(intents)} original intents")
    print(f"   üçΩÔ∏è {len(all_matches['matches'])} matched foods")
    print("   ‚èπÔ∏è  Stopped at intent matching (no query generation)")
else:
    intent_queries = len([q for q in final_queries if q["query_type"] == "intent"])
    bridged_queries = len([q for q in final_queries if q["query_type"] == "bridged"])

    print("\n‚úÖ Success!")
    print(f"üìä Generated {len(final_queries)} total queries:")
    print(f"   üéØ {intent_queries} intent-based queries (pure user searches)")
    print(f"   üîó {bridged_queries} bridged queries (food-aware)")
    print("   üìà ~75% authentic user intent coverage")

## Results Analysis

In [107]:
# Convert to DataFrame for analysis
df_results = pd.DataFrame(final_queries)
print("üìä Query Results Summary:")
print(f"Total queries: {len(df_results)}")
print(f"Query types: {df_results['query_type'].value_counts().to_dict()}")
print(f"Unique foods: {df_results['consumable_id'].nunique()}")

print("\nüìù Sample Results:")
print(
    df_results[["consumable_name", "query", "query_type", "original_intent"]].head(10)
)

üìä Query Results Summary:
Total queries: 76
Query types: {'bridged': 57, 'intent': 19}
Unique foods: 19

üìù Sample Results:
             consumable_name                                        query  \
0         Fried Potato Slice             something crispy for movie night   
1         Fried Potato Slice   crispy fried potato slices for movie night   
2         Fried Potato Slice  movie-night snack crispy potato chips style   
3         Fried Potato Slice   hot crunchy potato slices delivery near me   
4          Cheeseburger Meal                     something cheesy and hot   
5          Cheeseburger Meal     cheesy hot cheeseburger combo with fries   
6          Cheeseburger Meal           extra cheese burger meal delivered   
7          Cheeseburger Meal          hot melty cheeseburger meal near me   
8  Grilled Fish with Noodles                     protein-packed meal fast   
9  Grilled Fish with Noodles       high-protein grilled fish with noodles   

  query_type            

In [109]:
pd.set_option("display.max_rows", None)
df_results

Unnamed: 0,consumable_id,consumable_name,query,query_type,original_intent,generated_at
0,21496,Fried Potato Slice,something crispy for movie night,intent,something crispy for movie night,2025-09-23T09:39:27.489957
1,21496,Fried Potato Slice,crispy fried potato slices for movie night,bridged,something crispy for movie night,2025-09-23T09:39:27.489996
2,21496,Fried Potato Slice,movie-night snack crispy potato chips style,bridged,something crispy for movie night,2025-09-23T09:39:27.490002
3,21496,Fried Potato Slice,hot crunchy potato slices delivery near me,bridged,something crispy for movie night,2025-09-23T09:39:27.490005
4,6473,Cheeseburger Meal,something cheesy and hot,intent,something cheesy and hot,2025-09-23T09:39:27.490008
5,6473,Cheeseburger Meal,cheesy hot cheeseburger combo with fries,bridged,something cheesy and hot,2025-09-23T09:39:27.490028
6,6473,Cheeseburger Meal,extra cheese burger meal delivered,bridged,something cheesy and hot,2025-09-23T09:39:27.490031
7,6473,Cheeseburger Meal,hot melty cheeseburger meal near me,bridged,something cheesy and hot,2025-09-23T09:39:27.490033
8,2584,Grilled Fish with Noodles,protein-packed meal fast,intent,protein-packed meal fast,2025-09-23T09:39:27.490036
9,2584,Grilled Fish with Noodles,high-protein grilled fish with noodles,bridged,protein-packed meal fast,2025-09-23T09:39:27.490038


In [None]:
# Show intent vs bridged query comparison
print("\nüéØ Intent vs Bridged Query Comparison:")
for food_id in df_results["food_id"].unique()[:3]:  # Show first 3 foods
    food_queries = df_results[df_results["food_id"] == food_id]
    food_name = food_queries["food_name"].iloc[0]
    original_intent = food_queries["original_intent"].iloc[0]

    print(f"\nüçΩÔ∏è Food: {food_name}")
    print(f"   Original Intent: '{original_intent}'")

    intent_query = food_queries[food_queries["query_type"] == "intent"]["query"].iloc[0]
    print(f"   üìç Intent Query: '{intent_query}'")

    bridged_queries = food_queries[food_queries["query_type"] == "bridged"][
        "query"
    ].tolist()
    for i, bq in enumerate(bridged_queries, 1):
        print(f"   üîó Bridged {i}: '{bq}'")

## Save Results

In [None]:
# Save results
output_dir = "output"
Path(output_dir).mkdir(exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save original intents
intents_path = f"{output_dir}/dspy_intent_generation_intents_{timestamp}.txt"
with open(intents_path, "w", encoding="utf-8") as f:
    f.write("ORIGINAL USER INTENTS GENERATED (Step 1):\n")
    f.write("=" * 50 + "\n")
    for i, intent in enumerate(intents, 1):
        f.write(f"{i:2d}. {intent}\n")

# Save matches
matches_path = f"{output_dir}/dspy_intent_generation_matches_{timestamp}.json"
with open(matches_path, "w", encoding="utf-8") as f:
    json.dump(all_matches, f, indent=2)

# Save final queries (only if not stopping at intents)
queries_path = None
if not STOP_AT_INTENTS and final_queries:
    queries_path = f"{output_dir}/dspy_intent_generation_queries_{timestamp}.csv"
    df_results = pd.DataFrame(final_queries)
    df_results.to_csv(queries_path, index=False)

print("\nüìÅ Results saved:")
print(f"  üìÑ Original intents: {intents_path}")
print(f"  üîó Intent-food matches: {matches_path}")
if queries_path:
    print(f"  üìù Final queries: {queries_path}")
else:
    print("  ‚èπÔ∏è  Query generation skipped (stopped at intents)")

print("\nüéâ DSPy Intent Generation Complete!")