# Day 4: PlanDay Compilation with GEPA

**Goal**: Compile PlanDay module to improve event time preservation and location accuracy  
**Optimizer**: GEPA with budget=40 rollouts  
**Expected Runtime**: 4-6 hours

## Target Performance
- Uncompiled baseline: ~60-70% time/location preservation
- Compiled target: >85% preservation

## Setup: Install Dependencies

In [None]:
# Install required packages
!pip install -q dspy-ai sentence-transformers accelerate


## Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create directories
!mkdir -p /content/drive/MyDrive/mini-town/compiled
!mkdir -p /content/drive/MyDrive/mini-town/checkpoints


## Upload Seed File and Set API Key

In [None]:
# Set Together.ai API key for GEPA reflection LM
os.environ['TOGETHER_API_KEY'] = getpass('Enter your TOGETHER_API_KEY: ')
print("✅ API keys configured")


In [None]:
from google.colab import files
import os
from getpass import getpass

# Upload planner seeds
print("Upload planner_seeds_v1.json:")
uploaded = files.upload()

# Set API key
os.environ['GROQ_API_KEY'] = getpass('Enter your GROQ_API_KEY: ')


## Configure DSPy

In [None]:
import dspy

lm = dspy.LM(
    model="groq/llama-3.1-8b-instant",
    api_key=os.getenv('GROQ_API_KEY'),
    temperature=0.3,
    max_tokens=512
)

dspy.settings.configure(lm=lm)
print("✅ DSPy configured with Groq LLM")


## Define PlanDay Signature

In [None]:
class PlanDay(dspy.Signature):
    """Create a time-blocked plan preserving exact event times and locations.
    
    CRITICAL: When invited to an event, use the EXACT TIME from the invitation.
    Include coordinates in format (x, y) for all locations.
    """
    
    agent_goal: str = dspy.InputField(desc="Agent's high-level goal")
    agent_personality: str = dspy.InputField(desc="Agent's personality traits")
    current_time: str = dspy.InputField(desc="Current time (e.g., '2:30 PM')")
    current_location: str = dspy.InputField(desc="Current location coordinates")
    recent_events: str = dspy.InputField(desc="Recent invitations with exact times")
    relevant_memories: str = dspy.InputField(desc="Relevant memories")
    
    reasoning: str = dspy.OutputField(desc="Explain time preservation")
    plan: str = dspy.OutputField(desc="Time-blocked plan with coordinates")

print("✅ PlanDay signature defined")


## Load Seeds

In [None]:
import json

with open('planner_seeds_v1.json', 'r') as f:
    seeds_data = json.load(f)

trainset = []
for seed in seeds_data['seeds']:
    example = dspy.Example(
        agent_goal=seed['agent_goal'],
        agent_personality=seed['agent_personality'],
        current_time=seed['current_time'],
        current_location=seed['current_location'],
        recent_events='\n'.join([f"- {e}" for e in seed['recent_events']]),
        relevant_memories='\n'.join([f"- {m}" for m in seed['relevant_memories']]),
        gold_plan=seed['gold_plan']
    ).with_inputs('agent_goal', 'agent_personality', 'current_time', 'current_location', 'recent_events', 'relevant_memories')
    trainset.append(example)

print(f"✅ Loaded {len(trainset)} training examples")


## Define Metric

In [None]:
import re

TIME_PATTERN = re.compile(r"\b(\d{1,2}:\d{2}\s*[AP]M)\b", re.IGNORECASE)
LOCATION_PATTERN = re.compile(r"\((\d+(?:\.\d+)?),\s*(\d+(?:\.\d+)?)\)")

def extract_times(text):
    return [m.strip() for m in TIME_PATTERN.findall(text)]

def extract_locations(text):
    return [f"({m[0]}, {m[1]})" for m in LOCATION_PATTERN.findall(text)]

def planning_metric(example, pred, trace=None):
    plan_text = pred.plan if hasattr(pred, 'plan') else ""
    if not plan_text:
        return 0.0
    
    total_checks = 0
    satisfied = 0
    
    for event in example.recent_events.split('\n'):
        event = event.strip()
        if not event or event.startswith('#'):
            continue
        
        event_times = extract_times(event)
        event_locations = extract_locations(event)
        
        if event_times:
            total_checks += 1
            if any(t in plan_text for t in event_times):
                satisfied += 1
        
        if event_locations:
            total_checks += 1
            if any(loc in plan_text for loc in event_locations):
                satisfied += 1
    
    if total_checks == 0:
        return 1.0 if example.agent_goal.split()[0].lower() in plan_text.lower() else 0.5
    
    return satisfied / total_checks

print("✅ Metric defined")


## Create Uncompiled Baseline

In [None]:
uncompiled_planner = dspy.Predict(PlanDay)
print("✅ Uncompiled baseline created")


## Evaluate Uncompiled Baseline

In [None]:
def evaluate_planner(module, testset):
    scores = []
    for example in testset:
        try:
            pred = module(
                agent_goal=example.agent_goal,
                agent_personality=example.agent_personality,
                current_time=example.current_time,
                current_location=example.current_location,
                recent_events=example.recent_events,
                relevant_memories=example.relevant_memories
            )
            score = planning_metric(example, pred)
            scores.append(score)
        except Exception as e:
            print(f"Error: {e}")
            scores.append(0.0)
    
    avg = sum(scores) / len(scores)
    high_quality = sum(1 for s in scores if s >= 0.9)
    return {'avg': avg, 'high_quality_pct': high_quality / len(scores) * 100}

print("Evaluating uncompiled baseline...")
uncompiled_results = evaluate_planner(uncompiled_planner, trainset)
print(f"Avg Score: {uncompiled_results['avg']:.2f}")
print(f"High Quality: {uncompiled_results['high_quality_pct']:.1f}%")


## Run GEPA Compilation (4-6 hours)

In [None]:
from dspy.teleprompt import GEPA
import time
import os
import dspy

# Configure reflection LM (uses Together.ai model)
reflection_lm = dspy.LM(
    model="together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    api_key=os.getenv("TOGETHER_API_KEY"),
    temperature=0.5  # Slightly higher temp for creative reflection
)

# GEPA optimizer with correct configuration
optimizer = GEPA(
    metric=planning_metric,
    auto="medium",               # Use preset budget ("light", "medium", or "heavy")
    reflection_minibatch_size=5, # Number of examples per reflection round
    track_stats=True,            # Enable detailed logging
    reflection_lm=reflection_lm  # Required for GEPA
)

print("✅ GEPA optimizer initialized")
print("Budget level: medium")


## Run GEPA Compilation

⚠️ **This will take several hours**  
💾 Progress saved to Google Drive automatically

In [None]:
# Run GEPA compilation
print("🚀 Starting GEPA compilation...")
print("This will take several hours. You can monitor progress in the output below.")
print("-" * 60)

start_time = time.time()

compiled_planner = optimizer.compile(
    student=uncompiled_planner,
    trainset=trainset
)

elapsed = time.time() - start_time
print(f"\n✅ Compilation complete!")
print(f"Time elapsed: {elapsed/3600:.2f} hours")


## Evaluate Compiled Module

In [None]:
print("Evaluating compiled module...")
compiled_results = evaluate_planner(compiled_planner, trainset)
print(f"Avg Score: {compiled_results['avg']:.2f}")
print(f"High Quality: {compiled_results['high_quality_pct']:.1f}%")

improvement = compiled_results['high_quality_pct'] - uncompiled_results['high_quality_pct']
print(f"\nImprovement: +{improvement:.1f}%")


## Save Compiled Module

In [None]:
# Save to Google Drive
save_path = '/content/drive/MyDrive/mini-town/compiled/compiled_planner.json'
compiled_planner.save(save_path)
print(f"✅ Saved to: {save_path}")

# Save prompt for inspection
prompt_path = '/content/drive/MyDrive/mini-town/compiled/prompt_planner.txt'
with open(prompt_path, 'w') as f:
    f.write(str(compiled_planner.dump_state()))
print(f"✅ Prompt saved to: {prompt_path}")

# Save results
results = {
    'compilation_time_hours': elapsed / 3600,
    'uncompiled': uncompiled_results,
    'compiled': compiled_results,
    'improvement_pct': improvement
}

results_path = '/content/drive/MyDrive/mini-town/compiled/planner_results.json'
with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)
print(f"✅ Results saved to: {results_path}")


## Download Files

In [None]:
# Download compiled files
files.download('/content/drive/MyDrive/mini-town/compiled/compiled_planner.json')
files.download('/content/drive/MyDrive/mini-town/compiled/prompt_planner.txt')
files.download('/content/drive/MyDrive/mini-town/compiled/planner_results.json')

print("✅ Done! Copy compiled_planner.json to your project's compiled/ directory")
