# Granular Generator Debug Notebook

**Deep dive into DSPy components for advanced tinkering and debugging.**

This notebook exposes the full DSPy function code for modification, experimentation, and detailed debugging.

## Setup & Configuration

In [None]:
import json
import os
import sys

import dspy
from dotenv import load_dotenv

load_dotenv()

# Setup project paths
project_root = os.environ.get("root_folder")
if project_root:
    sys.path.insert(0, str(project_root))
    sys.path.insert(0, os.path.join(project_root, "esci-dataset"))

# Configuration
CONFIG = {
    'MODEL': 'gpt-4o-mini',
    'TEMPERATURE': 1.2,
    'ESCI_LABEL': 'E'
}

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
print(f"‚úÖ Setup complete. Model: {CONFIG['MODEL']}")

## üîß DSPy Model Setup (TINKER HERE)

In [None]:
def setup_dspy_model_debug(api_key: str, model: str = "gpt-5-mini", temperature: float = 0.7) -> None:
    """Setup DSPy with OpenAI model - FULL FUNCTION FOR TINKERING."""
    try:
        # Check if it's a GPT-5 reasoning model with special requirements
        if model.startswith("gpt-5"):
            print("üß† GPT-5 model detected - using special parameters")
            lm = dspy.LM(
                model=f"openai/{model}",
                api_key=api_key,
                max_tokens=128000,  # GPT-5 requires max_tokens >= 16000
                temperature=1.0,    # GPT-5 requires temperature=1.0
                model_type='responses'  # GPT-5 requires model_type='responses'
            )
        else:
            print(f"üîß Standard model parameters for {model}")
            lm = dspy.LM(
                model=f"openai/{model}",
                api_key=api_key,
                max_tokens=4000,
                temperature=temperature
            )

        # Configure DSPy
        dspy.settings.configure(lm=lm)
        print(f"‚úÖ DSPy configured: {model}")

    except Exception as e:
        error_msg = f"DSPy LM initialization failed for model '{model}': {str(e)}"
        print(f"‚ùå {error_msg}")
        raise RuntimeError(error_msg) from e

# Setup model
setup_dspy_model_debug(OPENAI_API_KEY, CONFIG['MODEL'], CONFIG['TEMPERATURE'])

## üîß Query Generator Class (TINKER HERE)

In [None]:
class QueryGeneratorSignature(dspy.Signature):
    """DSPy signature for query generation task."""
    prompt_with_candidates = dspy.InputField(
        desc="Complete prompt with food candidates and instructions"
    )
    esci_label = dspy.InputField(
        desc="ESCI label (E/S/C/I) to generate queries for"
    )
    generated_queries = dspy.OutputField(
        desc="JSON string containing generated queries following the specified format"
    )

class QueryGeneratorDebug(dspy.Module):
    """DSPy module for generating food delivery queries - TINKER VERSION."""

    def __init__(self):
        super().__init__()
        # EXPERIMENT HERE: Try different DSPy modules
        # self.generate = dspy.Predict(QueryGeneratorSignature)
        # self.generate = dspy.ReAct(QueryGeneratorSignature)
        self.generate = dspy.ChainOfThought(QueryGeneratorSignature)

    def forward(self, prompt_with_candidates: str, esci_label: str) -> str:
        """Generate queries using DSPy with detailed logging."""
        print("üöÄ API call starting...")
        print(f"   Model: {CONFIG['MODEL']}")
        print(f"   ESCI: {esci_label}")
        print(f"   Prompt: {len(prompt_with_candidates)} chars")

        try:
            result = self.generate(
                prompt_with_candidates=prompt_with_candidates,
                esci_label=esci_label
            )
            print(f"   ‚úÖ Success: {len(result.generated_queries)} chars")
            return result.generated_queries

        except Exception as e:
            print(f"   ‚ùå Failed: {e}")
            raise

# Initialize generator
generator = QueryGeneratorDebug()
print("‚úÖ QueryGeneratorDebug ready")

## üîß Parsing Function (TINKER HERE)

In [None]:
def parse_generated_output_debug(json_str: str):
    """Parse the generated JSON string - TINKER VERSION."""
    print(f"üîÑ Parsing response ({len(json_str)} chars)")

    # Check for empty responses
    if not json_str or json_str.strip() == "":
        raise ValueError(f"Empty response from API: '{json_str}'")

    # ADD CUSTOM JSON CLEANING HERE
    # json_str = json_str.replace("```json", "").replace("```", "")
    # json_str = json_str.strip()

    try:
        data = json.loads(json_str)
        print("   ‚úÖ JSON parsed successfully")

        # Validation (ADD CUSTOM CHECKS HERE)
        if isinstance(data, dict):
            print(f"   üìä Keys: {list(data.keys())}")
            if 'candidates' in data:
                print(f"   üçΩÔ∏è Candidates found: {len(data['candidates'])}")

        # Mock Pydantic structure for compatibility
        class MockOutput:
            def __init__(self, data):
                self.data = data
            def model_dump(self):
                return self.data

        return MockOutput(data)

    except json.JSONDecodeError as e:
        print(f"   ‚ùå JSON decode error: {e}")
        print(f"   üìù Response preview: {json_str[:200]}...")
        raise ValueError(f"JSON decode error: {e}") from e

    except Exception as e:
        print(f"   ‚ùå Parsing error: {e}")
        raise ValueError(f"Parsing failed: {e}") from e

print("üîß Debug parsing function ready")

## Test with Sample Data

In [None]:
# Create sample prompt for testing
sample_prompt = """
Generate realistic food delivery queries for EXACT matches.

Food candidates:
1. Pizza (cheese, ham, olives)
2. Noodle Soup (tofu, scallions, broth)

Generate 2 queries per item in JSON format:
{
  "candidates": [
    {
      "id": 1,
      "name": "Pizza",
      "queries": [
        {"query": "pizza delivery", "dimensions": {}},
        {"query": "cheese pizza near me", "dimensions": {"location": "near me"}}
      ]
    }
  ]
}
"""

print(f"üìù Sample prompt ready ({len(sample_prompt)} chars)")

## üöÄ Test API Call

In [None]:
# Make test API call
try:
    result_json = generator(sample_prompt, CONFIG['ESCI_LABEL'])
    print("\nüîç RAW RESPONSE:")
    print("=" * 60)
    print(result_json)
    print("=" * 60)

except Exception as e:
    print(f"‚ùå API call failed: {e}")
    result_json = None

## üîç Test Parsing

In [None]:
# Parse the response
if result_json:
    try:
        parsed = parse_generated_output_debug(result_json)
        candidates = parsed.model_dump().get("candidates", [])

        print("\nüéØ PARSED RESULTS:")
        for candidate in candidates:
            name = candidate.get("name", "Unknown")
            queries = candidate.get("queries", [])
            print(f"\nüçΩÔ∏è {name}:")
            for i, q in enumerate(queries, 1):
                query = q.get("query", "")
                dims = q.get("dimensions", {})
                print(f"   {i}. '{query}'")
                if dims:
                    print(f"      Dimensions: {dims}")

    except Exception as e:
        print(f"‚ùå Parsing failed: {e}")
else:
    print("‚ùå No response to parse")

## üéØ Experimentation Notes

**Things to try:**

1. **Different DSPy modules**:
   - `dspy.Predict(QueryGeneratorSignature)` - Simple prediction
   - `dspy.ReAct(QueryGeneratorSignature)` - Reasoning and acting
   - `dspy.ChainOfThought(QueryGeneratorSignature)` - Step-by-step reasoning

2. **Custom prompt preprocessing**:
   - Add prompt cleaning in `forward()` method
   - Modify input field descriptions
   - Add context or examples dynamically

3. **Response post-processing**:
   - JSON cleaning in parsing function
   - Custom validation rules
   - Response filtering or enhancement

4. **Model parameters**:
   - Adjust `max_tokens` for longer responses
   - Experiment with `temperature` for creativity
   - Try different models (GPT-4, GPT-5)

**This notebook is your DSPy playground! üé™**