# DSPy Optimization for ANEETAA Agents

This notebook demonstrates how to optimize ANEETAA agents using DSPy's SIMBA optimizer.

## Workflow:
1. Prepare training data from NEET materials
2. Define DSPy signatures and modules
3. Run SIMBA optimizer
4. Evaluate optimized agents
5. Log to MLflow
6. Deploy optimized models

In [2]:
# Install dependencies if needed
%pip install --upgrade "pydantic>=2.0" "dspy>=3.0.3" "mlflow>=3.4.0" datasets pandas numpy python-dotenv scikit-learn

Collecting pydantic>=2.0
  Using cached pydantic-2.12.3-py3-none-any.whl.metadata (87 kB)
Collecting pandas
  Using cached pandas-2.3.3-cp311-cp311-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Using cached numpy-2.3.4-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached pydantic-2.12.3-py3-none-any.whl (462 kB)
Using cached pandas-2.3.3-cp311-cp311-win_amd64.whl (11.3 MB)
Downloading python_dotenv-1.2.1-py3-none-any.whl (21 kB)
Downloading scikit_learn-1.7.2-cp311-cp311-win_amd64.whl (8.9 MB)
   ---------------------------------------- 0.0/8.9 MB ? eta -:--:--
   -------------------------------------- - 8.7/8.9 MB 44.9 MB/s eta 0:00:01
   -------------------------------

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-openai 0.3.35 requires langchain-core<1.0.0,>=0.3.78, but you have langchain-core 0.2.43 which is incompatible.


In [1]:
import os
import sys
import json
import dspy
import mlflow
import pandas as pd
import numpy as np
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

print("✓ Imports successful")

  from pandas.core import (


✓ Imports successful


## 1. Setup DSPy and MLflow

In [5]:
# Configure DSPy with your LLM
import getpass

# For OpenAI (recommended for optimization)
api_key = getpass.getpass("Enter your OpenAI API key: ")
os.environ['OPENAI_API_KEY'] = api_key

lm = dspy.LM(
    model="openai/gpt-4o-mini",
    max_tokens=500,
    temperature=0.1
)
dspy.settings.configure(lm=lm)

print("✓ DSPy configured with", lm.model)

✓ DSPy configured with openai/gpt-4o-mini


In [3]:
# Setup MLflow (safe: fall back to local file store to avoid network timeouts)
from pathlib import Path
from dotenv import load_dotenv
from mlflow.tracking import MlflowClient

# Load .env if present (so teammate's .env is respected)
load_dotenv()

# Use environment variable if set, otherwise use a local `mlruns` folder to avoid hitting an unavailable server
mlflow_tracking_uri = os.getenv('MLFLOW_TRACKING_URI', f"file://{str(Path.cwd() / 'mlruns')}")
mlflow.set_tracking_uri(mlflow_tracking_uri)

# If MLFLOW_EXPERIMENT_ID is set, use it; otherwise use experiment name
exp_id = os.getenv('MLFLOW_EXPERIMENT_ID')
if exp_id:
    try:
        client = MlflowClient()
        exp = client.get_experiment(exp_id)
        if exp is not None:
            mlflow.set_experiment(experiment_id=exp_id)
            print(f"Using Databricks experiment: {exp.name} (ID: {exp_id})")
        else:
            print(f"Experiment ID {exp_id} not found, creating new experiment")
            mlflow.set_experiment('aneeta-dspy-optimization')
    except Exception as e:
        print(f"Error accessing experiment ID: {e}")
        mlflow.set_experiment('aneeta-dspy-optimization')
else:
    mlflow.set_experiment('aneeta-dspy-optimization')

# Attempt to enable DSPy autologging if available, but fail fast and continue if not
try:
    if hasattr(mlflow, 'dspy'):
        try:
            mlflow.dspy.autolog()
            print("✓ DSPy autolog enabled")
        except Exception as e:
            print(f"ℹ DSPy autolog not available: {e}")
    else:
        print("ℹ mlflow.dspy not present in MLflow; check installation")
except Exception as e:
    print(f"⚠ MLflow autolog setup error: {e}")

print("✓ MLflow configured")
print(f"  Tracking URI: {mlflow.get_tracking_uri()}")

Using Databricks experiment: /Users/benjaminloh.2023@smu.edu.sg/MLflow Tracing Tutorial (ID: 3840367859585746)
✓ DSPy autolog enabled
✓ MLflow configured
  Tracking URI: databricks


In [4]:
# Diagnostic cell: verify MLflow <-> Databricks connectivity and DSPy tracer availability
import os
import mlflow
import dspy
from dotenv import load_dotenv
from mlflow.tracking import MlflowClient

load_dotenv()

print('mlflow version:', getattr(mlflow, '__version__', 'unknown'))
print('dspy version:', getattr(dspy, '__version__', 'unknown'))
print('mlflow.dspy present:', hasattr(mlflow, 'dspy'))
print('Effective tracking URI (before set):', mlflow.get_tracking_uri())

# If notebook or environment set a tracking URI, mlflow.get_tracking_uri() will show it.
# Try to resolve the provided experiment id (if any) to a name on the tracking server
exp_id = os.getenv('MLFLOW_EXPERIMENT_ID')
if exp_id:
    try:
        client = MlflowClient()
        exp = client.get_experiment(exp_id)
        if exp is not None:
            print(f'Resolved experiment id={exp_id} -> name={exp.name}')
        else:
            print(f'Experiment id={exp_id} not found on tracking server')
    except Exception as e:
        print('Failed to resolve experiment id:', e)
else:
    print('No MLFLOW_EXPERIMENT_ID set in environment')

# If mlflow.dspy exists, print a quick attribute to ensure it is usable
if hasattr(mlflow, 'dspy'):
    try:
        print('mlflow.dspy OK:', hasattr(mlflow.dspy, 'autolog'))
    except Exception as e:
        print('mlflow.dspy present but accessing it raised:', e)


mlflow version: 2.18.0
dspy version: 3.0.3
mlflow.dspy present: True
Effective tracking URI (before set): databricks
Resolved experiment id=3840367859585746 -> name=/Users/benjaminloh.2023@smu.edu.sg/MLflow Tracing Tutorial
mlflow.dspy OK: True
Resolved experiment id=3840367859585746 -> name=/Users/benjaminloh.2023@smu.edu.sg/MLflow Tracing Tutorial
mlflow.dspy OK: True


## 2. Load Training Data

In [6]:
def load_neet_training_data():
    """Load training data from processed NEET materials."""
    data_dir = Path.cwd().parent / 'Processed Data'
    
    # Load biology chunks
    bio_path = data_dir / 'processed_biology_chunks.json'
    chem_path = data_dir / 'processed_chemistry_chunks.json'
    physics_path = data_dir / 'processed_physics_chunks.json'
    
    examples = []
    
    for path, subject in [(bio_path, 'biology'), (chem_path, 'chemistry'), (physics_path, 'physics')]:
        if path.exists():
            with open(path, 'r', encoding='utf-8') as f:
                chunks = json.load(f)
                
            # Convert chunks to Q&A format (simple heuristic)
            for chunk in chunks[:20]:  # Limit for demo
                content = chunk.get('page_content', '')
                if len(content) > 100:
                    # Create a question from the first sentence
                    sentences = content.split('.')
                    if len(sentences) >= 2:
                        question = f"Explain: {sentences[0].strip()}"
                        answer = content
                        
                        examples.append(dspy.Example(
                            question=question,
                            context=content,
                            answer=answer[:500],
                            subject=subject
                        ).with_inputs('question', 'context'))
    
    print(f"✓ Loaded {len(examples)} training examples")
    return examples

# Load data
training_data = load_neet_training_data()

# Split into train/test
from sklearn.model_selection import train_test_split
trainset, testset = train_test_split(training_data, test_size=0.2, random_state=42)

print(f"Train: {len(trainset)} | Test: {len(testset)}")

✓ Loaded 58 training examples
Train: 46 | Test: 12
Train: 46 | Test: 12


In [7]:
# Inspect a training example
print("Example training data:")
print(trainset[0])

Example training data:
Example({'question': 'Explain: 147 Appendix\nIn each case, we divide or multiply the decimal by a power of 10, to bring one non-zero digit to the left\nof the decimal point, and do the reverse operation by the same power of 10, indicated separately', 'context': '147 Appendix\nIn each case, we divide or multiply the decimal by a power of 10, to bring one non-zero digit to the left\nof the decimal point, and do the reverse operation by the same power of 10, indicated separately.\nThus, any positive decimal can be written in the form\nn = m × 10\np\nwhere p is an integer (positive, zero or negative) and 1 < m < 10. This is called the “standard form of n.”\nWorking Rule\n1. Move the decimal point to the left, or to the right, as may be necessary, to bring one non-zero digit\nto the left of decimal point.\n2. (i) If you move p places to the left, multiply by 10\np\n.\n(ii) If you move p places to the right, multiply by 10\n–p\n.\n(iii) If you do not move the decimal p

## 3. Define DSPy Modules

In [8]:
# Import from our DSPy agents module
from aneeta.nodes.agents_dspy import (
    TeacherSignature,
    TeacherAgentDSPy,
    MCQSolverSignature,
    MCQSolverAgentDSPy
)

# Initialize agents
teacher_agent = TeacherAgentDSPy()
mcq_solver = MCQSolverAgentDSPy()

print("✓ DSPy agents initialized")

2025-10-31 01:45:32.427 
  command:

    streamlit run c:\Users\benja\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-10-31 01:45:32.427 
  command:

    streamlit run c:\Users\benja\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


✓ DSPy agents initialized


## 4. Define Evaluation Metrics

In [9]:
def validate_explanation(example, prediction, trace=None):
    """Validate if explanation is good quality."""
    # Simple check: response should be longer than question
    response = prediction.response if hasattr(prediction, 'response') else str(prediction)
    
    if len(response) < 50:
        return 0.0
    
    # Check if it contains key terms from context
    context_words = set(example.context.lower().split())
    response_words = set(response.lower().split())
    overlap = len(context_words & response_words) / len(context_words) if context_words else 0
    
    return overlap > 0.1

print("✓ Metrics defined")

✓ Metrics defined


## 5. Optimize Teacher Agent with SIMBA

In [10]:
from dspy import SIMBA

# Configure optimizer
optimizer = SIMBA(
    metric=validate_explanation,
    max_demos=3,        # Number of demonstrations to bootstrap
    bsize=12,           # Batch size for evaluation
    num_threads=1       # Number of parallel threads
)

print("✓ SIMBA optimizer configured")
print("Starting optimization... (this may take a few minutes)")

✓ SIMBA optimizer configured
Starting optimization... (this may take a few minutes)


In [11]:
# Run optimization
with mlflow.start_run(run_name="teacher_agent_optimization"):
    optimized_teacher = optimizer.compile(
        teacher_agent,
        trainset=trainset[:30],  # Use subset for faster optimization
    )
    
    print("✓ Optimization complete!")

2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Starting batch 1 of 8.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.


  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:01, 42.62it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:01, 67.88it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 89.66it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 72 examples:   6%|▌         | 4/72 [00:00<00:00, 102.62it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'lan

2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Baseline mini-batch score: 0.0

2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_demo_
2025/10/31 01:45:48 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_demo_
2025/10/31 01:45:48 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba: 

2025/10/31 01:45:48 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:45:48 INFO dspy.teleprompt.simba




2025/10/31 01:45:57 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a complex question about physics concepts, it should break down the question into simpler components, ensuring that each part is addressed clearly. Additionally, it should leverage the context provided to extract key terms and concepts, ensuring that the explanation is both accurate and comprehensible. Avoid overly technical jargon unless necessary, and always aim for clarity in the response.
2025/10/31 01:45:57 INFO dspy.teleprompt.simba: 

2025/10/31 01:45:57 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:45:57 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_demo_
2025/10/31 01:45:57 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:45:57 INFO dspy.teleprompt.simba: 

2025/10/31 01:45:57 INFO dspy.teleprompt.simba: Ba

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 133.92it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 162.39it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 181.41it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 133.92it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:46:03 INFO dspy.teleprompt.simba: Scores after 1 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:46:03 INFO dspy.teleprompt.simba: Starting batch 2 of 8.
2025/10/31 01:46:03 INFO dspy.teleprompt.simba: Starting batch 2 of 8.





2025/10/31 01:46:03 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.


  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 90.90it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 116.49it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 128.91it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 90.90it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'la

2025/10/31 01:46:04 INFO dspy.teleprompt.simba: Batch 2: Baseline mini-batch score: 0.0

2025/10/31 01:46:04 INFO dspy.teleprompt.simba: Batch 2: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:04 INFO dspy.teleprompt.simba: Batch 2: Invoking strategy: append_a_rule
2025/10/31 01:46:04 INFO dspy.teleprompt.simba: Batch 2: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:04 INFO dspy.teleprompt.simba: Batch 2: Invoking strategy: append_a_rule





2025/10/31 01:46:09 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a context that includes specific examples of amino acids and their functions, it should focus on clearly defining each amino acid mentioned, explaining their roles in proteins, and emphasizing the distinction between essential and non-essential amino acids. The explanation should be structured to first introduce the concept, followed by examples, and conclude with their significance in nutrition, ensuring clarity and relevance to the student's question.
2025/10/31 01:46:09 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:09 INFO dspy.teleprompt.simba: Batch 2: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:09 INFO dspy.teleprompt.simba: Batch 2: Invoking strategy: append_a_rule
2025/10/31 01:46:09 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:09 INFO dspy.teleprompt.simba: Batch 2: Processing bucket #2, with max sco

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 136.50it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 180.44it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 270.65it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 84 examples:   5%|▍         | 4/84 [00:00<00:00, 360.87it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Scores after 2 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Starting batch 3 of 8.
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Starting batch 3 of 8.
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/72 [00:00<?, ?it/s]    | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:01, 66.11it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 101.26it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 125.16it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 72 examples:   6%|▌         | 4/72 [00:00<00:00, 166.89it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Batch 3: Baseline mini-batch score: 0.0

2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Batch 3: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Batch 3: Invoking strategy: append_a_rule
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Batch 3: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:24 INFO dspy.teleprompt.simba: Batch 3: Invoking strategy: append_a_rule





2025/10/31 01:46:29 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a context that includes specific details about a biological concept and a clear question regarding that concept, then it should ensure that it extracts key information from the context to construct a coherent and informative explanation. The module should focus on clarity and simplicity in its language, ensuring that the explanation is accessible to the target audience. Additionally, it should verify that it translates the explanation accurately into the specified language without redundancy.
2025/10/31 01:46:29 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:29 INFO dspy.teleprompt.simba: Batch 3: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:29 INFO dspy.teleprompt.simba: Batch 3: Invoking strategy: append_a_rule
2025/10/31 01:46:29 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:29 INFO dspy.teleprompt.simba: Bat

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 118.97it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 158.96it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 180.94it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 84 examples:   5%|▍         | 4/84 [00:00<00:00, 191.12it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Scores after 3 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Starting batch 4 of 8.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Starting batch 4 of 8.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/72 [00:00<?, ?it/s]    | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 107.77it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 162.75it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 184.15it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 72 examples:   6%|▌         | 4/72 [00:00<00:00, 207.39it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 

2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Baseline mini-batch score: 0.0

2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Invoking strategy: append_a_demo_
2025/10/31 01:46:52 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Invoking strategy: append_a_rule
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:52 INFO dspy.teleprompt.simba: Batch 4: Invoking strategy: append_a_demo_
2025/10/31 01:46:52 INFO dspy.teleprompt.simba_utils: Adde




2025/10/31 01:46:58 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a complex question about a physics concept, it should first identify key components of the question and break them down into simpler parts. Then, it should utilize the context to construct a clear and structured explanation in simple English, followed by a translation in the target language without repeating the English answer. This approach will enhance clarity and relevance in the response.
2025/10/31 01:46:58 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:58 INFO dspy.teleprompt.simba: Batch 4: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:46:58 INFO dspy.teleprompt.simba: Batch 4: Invoking strategy: append_a_rule
2025/10/31 01:46:58 INFO dspy.teleprompt.simba: 

2025/10/31 01:46:58 INFO dspy.teleprompt.simba: Batch 4: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 134.36it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 134.36it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 174.82it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 186.54it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:47:03 INFO dspy.teleprompt.simba: Scores after 4 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:47:03 INFO dspy.teleprompt.simba: Starting batch 5 of 8.
2025/10/31 01:47:03 INFO dspy.teleprompt.simba: Starting batch 5 of 8.
2025/10/31 01:47:03 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:47:03 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 119.54it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 119.54it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 174.76it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples

2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Baseline mini-batch score: 0.0

2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Invoking strategy: append_a_demo_
2025/10/31 01:47:04 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:04 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Invoking strategy: append_a_rule
2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:04 INFO dspy.teleprompt.simba: Batch 5: Invoking strategy: append_a_demo_
2025/10/31 01:47:04 INFO dspy.teleprompt.simba_utils: Adde




2025/10/31 01:47:10 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a complex question about a chemistry concept, it should break down the question into simpler components, ensuring that each part is addressed clearly. Additionally, it should leverage the context provided to create a structured response that includes a clear explanation in simple English, followed by a translation in the target language without repeating the English answer. This approach will help in generating more accurate and relevant outputs.
2025/10/31 01:47:10 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:10 INFO dspy.teleprompt.simba: Batch 5: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:10 INFO dspy.teleprompt.simba: Batch 5: Invoking strategy: append_a_demo_
2025/10/31 01:47:10 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:10 INFO dspy.teleprompt.simb

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 144.98it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 175.32it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 194.72it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 144.98it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:47:14 INFO dspy.teleprompt.simba: Scores after 5 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:47:14 INFO dspy.teleprompt.simba: Starting batch 6 of 8.
2025/10/31 01:47:14 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:47:14 INFO dspy.teleprompt.simba: Starting batch 6 of 8.
2025/10/31 01:47:14 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/72 [00:00<?, ?it/s]    | 1/72 [00:00<00:00, 111.15it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 111.15it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 153.87it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 187.53it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 

2025/10/31 01:47:15 INFO dspy.teleprompt.simba: Batch 6: Baseline mini-batch score: 0.0

2025/10/31 01:47:15 INFO dspy.teleprompt.simba: Batch 6: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:15 INFO dspy.teleprompt.simba: Batch 6: Invoking strategy: append_a_rule
2025/10/31 01:47:15 INFO dspy.teleprompt.simba: Batch 6: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:15 INFO dspy.teleprompt.simba: Batch 6: Invoking strategy: append_a_rule
2025/10/31 01:47:15 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives complex scientific questions like 'Explain: 143 Appendix
Reduction half-reaction Eo/V', it should break down the question into simpler components, identify key concepts, and ensure that the context provided is directly relevant to the question. Additionally, it should prioritize clarity and conciseness in its explanations, en

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 110.80it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 134.61it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 152.25it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 110.80it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Scores after 6 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Starting batch 7 of 8.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Starting batch 7 of 8.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:01, 69.56it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:01, 69.56it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 98.16it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 98.

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Baseline mini-batch score: 0.0

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Invoking strategy: append_a_demo_
2025/10/31 01:47:24 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Invoking strategy: append_a_demo_
2025/10/31 01:47:24 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: 






2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Invoking strategy: append_a_demo_
2025/10/31 01:47:24 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Invoking strategy: append_a_demo_
2025/10/31 01:47:24 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Processing bucket #4, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:24 INFO dspy.teleprompt.simba: Batch 7: Invoking strategy: append_a_

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<00:00, 312.45it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 167.94it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 176.99it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 84 examples:   5%|▍         | 4/84 [00:00<00:00, 189.38it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:47:35 INFO dspy.teleprompt.simba: Scores after 7 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:47:35 INFO dspy.teleprompt.simba: Starting batch 8 of 8.
2025/10/31 01:47:35 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.
2025/10/31 01:47:35 INFO dspy.teleprompt.simba: Starting batch 8 of 8.
2025/10/31 01:47:35 INFO dspy.teleprompt.simba: Sampling program trajectories on 12 examples x 6 samples.



  0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 72 examples:   1%|▏         | 1/72 [00:00<00:00, 82.25it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 72 examples:   3%|▎         | 2/72 [00:00<00:00, 117.10it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 72 examples:   4%|▍         | 3/72 [00:00<00:00, 175.66it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/72 [00:00<?, ?it/s]    | 4/72 [00:00<00:00, 234.21it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 72 examples:   0%|          | 0/72 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: '

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Baseline mini-batch score: 0.0

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Processing bucket #1, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Invoking strategy: append_a_demo_
2025/10/31 01:47:36 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Processing bucket #2, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Invoking strategy: append_a_demo_
2025/10/31 01:47:36 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Processing bucket #3, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0




2025/10/31 01:47:36 INFO dspy.teleprompt.simba_utils: Advice for generate_explanation.predict: If the module receives a complex question about a chemistry concept, it should break down the question into simpler components, ensuring that each part is addressed clearly. Additionally, it should leverage the context provided to create a structured response that includes a clear explanation in simple English, followed by a translation in the target language without repeating the English answer. This approach will help in generating more accurate and relevant outputs.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: 

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Processing bucket #6, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Batch 8: Invoking strategy: append_a_demo_
2025/10/31 01:47:36 INFO dspy.teleprompt.simba_utils: Added 0 demos (one each) across all predictors.
2025/10/31 01:47:36 INFO dspy.teleprompt.simb

  0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 84 examples:   0%|          | 0/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 84 examples:   1%|          | 1/84 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 84 examples:   2%|▏         | 2/84 [00:00<00:00, 116.86it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 4 / 84 examples:   4%|▎         | 3/84 [00:00<00:00, 139.62it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 5 / 84 examples:   5%|▍         | 4/84 [00:00<00:00, 156.94it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: Scores after 8 batches: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0

2025/10/31 01:47:36 INFO dspy.teleprompt.simba: VALIDATION: Evaluating 7 programs on the full trainset.
2025/10/31 01:47:36 INFO dspy.teleprompt.simba: VALIDATION: Evaluating 7 programs on the full trainset.



  0%|          | 0/210 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
  0%|          | 0/210 [00:00<?, ?it/s]    | 0/210 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 1 / 210 examples:   0%|          | 0/210 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 210 examples:   0%|          | 1/210 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 210 examples:   1%|          | 2/210 [00:00<00:01, 140.27it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 2 / 210 examples:   0%|          | 1/210 [00:00<?, ?it/s]TeacherAgentDSPy.forward() missing 1 required positional argument: 'language'
Processed 3 / 210 examples:   1%|          | 2/210 [00:00<00:01, 140.2

2025/10/31 01:47:38 INFO dspy.teleprompt.simba: Final trainset scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Best: 0.0 (at index 0)






✓ Optimization complete!
✓ Optimization complete!
🏃 View run teacher_agent_optimization at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746/runs/43de6b0835c54675907bf916a18c219e
🧪 View experiment at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746
🏃 View run teacher_agent_optimization at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746/runs/43de6b0835c54675907bf916a18c219e
🧪 View experiment at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746


## 6. Evaluate Performance

In [12]:
def evaluate_agent(agent, testset, name="Agent"):
    """Evaluate agent on test set."""
    scores = []
    
    for example in testset[:10]:  # Limit for demo
        try:
            prediction = agent(
                question=example.question,
                context=example.context,
                language="English"
            )
            score = validate_explanation(example, prediction)
            scores.append(score)
        except Exception as e:
            print(f"Error: {e}")
            scores.append(0)
    
    avg_score = np.mean(scores)
    print(f"{name} - Average Score: {avg_score:.2%}")
    return avg_score

# Evaluate baseline vs optimized
baseline_score = evaluate_agent(TeacherAgentDSPy(), testset, "Baseline")
optimized_score = evaluate_agent(optimized_teacher, testset, "Optimized")

improvement = ((optimized_score - baseline_score) / baseline_score * 100) if baseline_score > 0 else 0
print(f"\n🎉 Improvement: {improvement:.1f}%")



Baseline - Average Score: 90.00%




Optimized - Average Score: 90.00%

🎉 Improvement: 0.0%


## 7. Inspect Optimized Prompts

In [13]:
# Test the optimized agent
test_question = "Explain the process of photosynthesis"
test_context = "Photosynthesis is the process by which green plants use sunlight to synthesize foods from carbon dioxide and water."

result = optimized_teacher(
    question=test_question,
    context=test_context,
    language="Tamil"
)

print("Question:", test_question)
print("\nResponse:")
print(result.response)



Question: Explain the process of photosynthesis

Response:
Photosynthesis is the process where green plants use sunlight to make their food. They take in carbon dioxide from the air and water from the soil. Using sunlight, they convert these into glucose (a type of sugar) and oxygen. This process is vital for plants and produces oxygen for us to breathe.

பசுமை செடிகள் சூரிய ஒளியை பயன்படுத்தி தங்கள் உணவை உருவாக்கும் செயல்முறை. அவைகள் காற்றில் இருந்து கார்பன் டயாக்சைடு மற்றும் மண்ணில் இருந்து நீரை எடுத்துக்கொள்கின்றன. சூரிய ஒளியின் உதவியுடன், அவைகள் இதனை கிளுக்கோஸ் (ஒரு வகை சர்க்கரை) மற்றும் ஆக்சிஜனாக மாற்றுகின்றன. இந்த செயல்முறை செடிகளுக்கு மிகவும் முக்கியமானது மற்றும் நமக்கு மூச்சு எடுக்க ஆக்சிஜனை உருவாக்குகிறது.


## 8. Log to MLflow Model Registry

In [14]:
with mlflow.start_run(run_name="teacher_agent_v1"):
    # Log model
    model_info = mlflow.dspy.log_model(
        optimized_teacher,
        artifact_path="teacher_agent",
        input_example="What is mitosis?"
    )
    
    # Log metrics
    mlflow.log_metric("baseline_score", baseline_score)
    mlflow.log_metric("optimized_score", optimized_score)
    mlflow.log_metric("improvement_percent", improvement)
    
    # Log parameters
    mlflow.log_param("optimizer", "SIMBA")
    mlflow.log_param("max_demos", 3)
    mlflow.log_param("training_size", len(trainset))
    
    print("✓ Model logged to MLflow")
    print(f"  Model URI: {model_info.model_uri}")

TypeError("TeacherAgentDSPy.forward() missing 2 required positional arguments: 'question' and 'language'")Traceback (most recent call last):


  File "c:\Users\benja\anaconda3\Lib\site-packages\mlflow\utils\_capture_modules.py", line 166, in load_model_and_predict
    model.predict(input_example, params=params)


  File "c:\Users\benja\anaconda3\Lib\site-packages\mlflow\dspy\wrapper.py", line 62, in predict
    return self.model(inputs).toDict()
           ^^^^^^^^^^^^^^^^^^


  File "c:\Users\benja\anaconda3\Lib\site-packages\dspy\utils\callback.py", line 343, in sync_wrapper
    raise exception


  File "c:\Users\benja\anaconda3\Lib\site-packages\dspy\utils\callback.py", line 339, in sync_wrapper
    results = fn(instance, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "c:\Users\benja\anaconda3\Lib\site-packages\dspy\primitives\module.py", line 78, in __call__
    return self.forward(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


TypeError: Teacher

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

  "inputs": "What is mitosis?"
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: TeacherAgentDSPy.forward() missing 2 required positional arguments: 'question' and 'language'


Uploading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

✓ Model logged to MLflow
  Model URI: runs:/0b5f733dbbdf42339fff50833e273d46/teacher_agent
🏃 View run teacher_agent_v1 at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746/runs/0b5f733dbbdf42339fff50833e273d46
🧪 View experiment at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746
🏃 View run teacher_agent_v1 at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746/runs/0b5f733dbbdf42339fff50833e273d46
🧪 View experiment at: https://dbc-847d0b13-8d35.cloud.databricks.com/ml/experiments/3840367859585746


## 9. Register Model for Production

In [15]:
# Register model
model_name = "teacher-agent-dspy"

model_version = mlflow.register_model(
    model_uri=model_info.model_uri,
    name=model_name
)

print(f"✓ Registered model: {model_name}")
print(f"  Version: {model_version.version}")

RestException: INVALID_PARAMETER_VALUE: Bad model name: please specify all three levels of the model in the form `catalog_name.schema_name.model_name`

In [None]:
# Transition to production
client = mlflow.tracking.MlflowClient()
client.transition_model_version_stage(
    name=model_name,
    version=model_version.version,
    stage="Production"
)

print(f"✓ Model transitioned to Production stage")

## 10. Load and Test Production Model

In [None]:
# Load production model
production_model_uri = f"models:/{model_name}/production"
production_agent = mlflow.dspy.load_model(production_model_uri)

# Test it
test_result = production_agent(
    question="What is DNA replication?",
    context="DNA replication is the process of producing two identical replicas from one original DNA molecule.",
    language="Hindi"
)

print("Production Model Response:")
print(test_result.response)

## Next Steps

1. **Optimize other agents**: MCQ Solver, Mentor, Quiz Generator
2. **Expand training data**: Use more NEET materials
3. **Try other optimizers**: MIPROv2, BootstrapFewShot
4. **Fine-tune models**: Use BootstrapFinetune for Gemma
5. **Deploy to production**: Update app.py to load from MLflow

See `DSPy_Integration_Guide.md` for detailed instructions!