# DSPy Optimization for ANEETAA Agents

This notebook demonstrates how to optimize ANEETAA agents using DSPy's SIMBA optimizer.

## Workflow:
1. Prepare training data from NEET materials
2. Define DSPy signatures and modules
3. Run SIMBA optimizer
4. Evaluate optimized agents
5. Log to MLflow
6. Deploy optimized models

In [None]:
# Install dependencies if needed
# %pip install "dspy>=3.0.3" "mlflow>=3.4.0" datasets pandas numpy

In [None]:
import os
import sys
import json
import dspy
import mlflow
import pandas as pd
import numpy as np
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

print("✓ Imports successful")

## 1. Setup DSPy and MLflow

In [None]:
# Configure DSPy with your LLM
import getpass

# For OpenAI (recommended for optimization)
api_key = getpass.getpass("Enter your OpenAI API key: ")
os.environ['OPENAI_API_KEY'] = api_key

lm = dspy.LM(
    model="openai/gpt-4o-mini",
    max_tokens=500,
    temperature=0.1
)
dspy.settings.configure(lm=lm)

print("✓ DSPy configured with", lm.model)

In [None]:
# Setup MLflow
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("aneeta-dspy-optimization")
mlflow.dspy.autolog()

print("✓ MLflow configured")
print(f"  Tracking URI: {mlflow.get_tracking_uri()}")
print(f"  Experiment: {mlflow.get_experiment_by_name('aneeta-dspy-optimization').name}")

## 2. Load Training Data

In [None]:
def load_neet_training_data():
    """Load training data from processed NEET materials."""
    data_dir = Path.cwd().parent / 'Processed Data'
    
    # Load biology chunks
    bio_path = data_dir / 'processed_biology_chunks.json'
    chem_path = data_dir / 'processed_chemistry_chunks.json'
    physics_path = data_dir / 'processed_physics_chunks.json'
    
    examples = []
    
    for path, subject in [(bio_path, 'biology'), (chem_path, 'chemistry'), (physics_path, 'physics')]:
        if path.exists():
            with open(path, 'r', encoding='utf-8') as f:
                chunks = json.load(f)
                
            # Convert chunks to Q&A format (simple heuristic)
            for chunk in chunks[:20]:  # Limit for demo
                content = chunk.get('page_content', '')
                if len(content) > 100:
                    # Create a question from the first sentence
                    sentences = content.split('.')
                    if len(sentences) >= 2:
                        question = f"Explain: {sentences[0].strip()}"
                        answer = content
                        
                        examples.append(dspy.Example(
                            question=question,
                            context=content,
                            answer=answer[:500],
                            subject=subject
                        ).with_inputs('question', 'context'))
    
    print(f"✓ Loaded {len(examples)} training examples")
    return examples

# Load data
training_data = load_neet_training_data()

# Split into train/test
from sklearn.model_selection import train_test_split
trainset, testset = train_test_split(training_data, test_size=0.2, random_state=42)

print(f"Train: {len(trainset)} | Test: {len(testset)}")

In [None]:
# Inspect a training example
print("Example training data:")
print(trainset[0])

## 3. Define DSPy Modules

In [None]:
# Import from our DSPy agents module
from aneeta.nodes.agents_dspy import (
    TeacherSignature,
    TeacherAgentDSPy,
    MCQSolverSignature,
    MCQSolverAgentDSPy
)

# Initialize agents
teacher_agent = TeacherAgentDSPy()
mcq_solver = MCQSolverAgentDSPy()

print("✓ DSPy agents initialized")

## 4. Define Evaluation Metrics

In [None]:
def validate_explanation(example, prediction, trace=None):
    """Validate if explanation is good quality."""
    # Simple check: response should be longer than question
    response = prediction.response if hasattr(prediction, 'response') else str(prediction)
    
    if len(response) < 50:
        return 0.0
    
    # Check if it contains key terms from context
    context_words = set(example.context.lower().split())
    response_words = set(response.lower().split())
    overlap = len(context_words & response_words) / len(context_words) if context_words else 0
    
    return overlap > 0.1

print("✓ Metrics defined")

## 5. Optimize Teacher Agent with SIMBA

In [None]:
from dspy import SIMBA

# Configure optimizer
optimizer = SIMBA(
    metric=validate_explanation,
    max_demos=3,        # Number of demonstrations to bootstrap
    bsize=12,           # Batch size for evaluation
    num_threads=1       # Number of parallel threads
)

print("✓ SIMBA optimizer configured")
print("Starting optimization... (this may take a few minutes)")

In [None]:
# Run optimization
with mlflow.start_run(run_name="teacher_agent_optimization"):
    optimized_teacher = optimizer.compile(
        teacher_agent,
        trainset=trainset[:30],  # Use subset for faster optimization
    )
    
    print("✓ Optimization complete!")

## 6. Evaluate Performance

In [None]:
def evaluate_agent(agent, testset, name="Agent"):
    """Evaluate agent on test set."""
    scores = []
    
    for example in testset[:10]:  # Limit for demo
        try:
            prediction = agent(
                question=example.question,
                context=example.context,
                language="English"
            )
            score = validate_explanation(example, prediction)
            scores.append(score)
        except Exception as e:
            print(f"Error: {e}")
            scores.append(0)
    
    avg_score = np.mean(scores)
    print(f"{name} - Average Score: {avg_score:.2%}")
    return avg_score

# Evaluate baseline vs optimized
baseline_score = evaluate_agent(TeacherAgentDSPy(), testset, "Baseline")
optimized_score = evaluate_agent(optimized_teacher, testset, "Optimized")

improvement = ((optimized_score - baseline_score) / baseline_score * 100) if baseline_score > 0 else 0
print(f"\n🎉 Improvement: {improvement:.1f}%")

## 7. Inspect Optimized Prompts

In [None]:
# Test the optimized agent
test_question = "Explain the process of photosynthesis"
test_context = "Photosynthesis is the process by which green plants use sunlight to synthesize foods from carbon dioxide and water."

result = optimized_teacher(
    question=test_question,
    context=test_context,
    language="Tamil"
)

print("Question:", test_question)
print("\nResponse:")
print(result.response)

## 8. Log to MLflow Model Registry

In [None]:
with mlflow.start_run(run_name="teacher_agent_v1"):
    # Log model
    model_info = mlflow.dspy.log_model(
        optimized_teacher,
        artifact_path="teacher_agent",
        input_example="What is mitosis?"
    )
    
    # Log metrics
    mlflow.log_metric("baseline_score", baseline_score)
    mlflow.log_metric("optimized_score", optimized_score)
    mlflow.log_metric("improvement_percent", improvement)
    
    # Log parameters
    mlflow.log_param("optimizer", "SIMBA")
    mlflow.log_param("max_demos", 3)
    mlflow.log_param("training_size", len(trainset))
    
    print("✓ Model logged to MLflow")
    print(f"  Model URI: {model_info.model_uri}")

## 9. Register Model for Production

In [None]:
# Register model
model_name = "teacher-agent-dspy"

model_version = mlflow.register_model(
    model_uri=model_info.model_uri,
    name=model_name
)

print(f"✓ Registered model: {model_name}")
print(f"  Version: {model_version.version}")

In [None]:
# Transition to production
client = mlflow.tracking.MlflowClient()
client.transition_model_version_stage(
    name=model_name,
    version=model_version.version,
    stage="Production"
)

print(f"✓ Model transitioned to Production stage")

## 10. Load and Test Production Model

In [None]:
# Load production model
production_model_uri = f"models:/{model_name}/production"
production_agent = mlflow.dspy.load_model(production_model_uri)

# Test it
test_result = production_agent(
    question="What is DNA replication?",
    context="DNA replication is the process of producing two identical replicas from one original DNA molecule.",
    language="Hindi"
)

print("Production Model Response:")
print(test_result.response)

## Next Steps

1. **Optimize other agents**: MCQ Solver, Mentor, Quiz Generator
2. **Expand training data**: Use more NEET materials
3. **Try other optimizers**: MIPROv2, BootstrapFewShot
4. **Fine-tune models**: Use BootstrapFinetune for Gemma
5. **Deploy to production**: Update app.py to load from MLflow

See `DSPy_Integration_Guide.md` for detailed instructions!