In [None]:
from utils.helper import get_api_key, validate_prediction, load_data, ExperimentStats, load_cache, save_cache
import dspy
import mlflow

In [None]:
# Set up experiment
# Before execution this cell
# run in terminal:
# mlflow server --backend-store-uri sqlite:///data/mlflow.db --port 5005

mlflow.set_tracking_uri("http://127.0.0.1:5005")
mlflow.set_experiment("dspy_optimization")

# Enable automatic logging for DSPy
mlflow.dspy.autolog()
print("✓ MLflow tracking enabled")
print("View results: http://localhost:5005 or http://127.0.0.1:5005")

In [None]:
# Configure your values here
model_name = 'groq/llama-3.1-8b-instant'
api_key = get_api_key('GROQ_API_KEY')
api_endpoint = 'https://api.groq.com/openai/v1'
useCache = True

In [None]:
# Original LLM - 8b params model
llm = dspy.LM(
    model_name,
    api_key=api_key,
    api_base=api_endpoint,
    cache=useCache
)

# Configure the large LLM - 20b params model for GEPA optimization
large_llm = dspy.LM(
    'groq/openai/gpt-oss-20b',
    api_key=api_key,
    api_base=api_endpoint,
    cache=useCache
)

# Set default LLM
dspy.settings.configure(lm=llm)

In [None]:
ds = load_data('../data/dataset.yaml')
# Load cache if exists
load_cache("../data/cache.pkl")

# Let's map our format to dspy's `Example` type
dataset = [dspy.Example(v).with_inputs('content', 'traceback') for v in ds['workshop']]

In [None]:
def metric_function(example, prediction, trace=None):
    fixed_code = prediction.fixed_code
    score, comment = validate_prediction(fixed_code, example['test_case'])
    return score


In [None]:
# Baseline: Dummy fixer (returns original code)
class DummyFixer(dspy.Module):
    """A dummy fixer that returns the original code"""

    def forward(self, content, traceback) -> dspy.Prediction:
        return dspy.Prediction(
            analysis="Code analysis",
            fixed_code=content)

dummy_fixer = DummyFixer()

In [None]:
# ========================================
# Your CodeFixer from Section 3
# ========================================

class AnalyzeSignature(dspy.Signature):
    """Explain the problem in the code"""
    snippet = dspy.InputField(description="Code snippet")
    context = dspy.InputField(description="Extra context about issue, like syntax error, etc.")
    summary = dspy.OutputField(description="Issue details")

class FixSignature(dspy.Signature):
    """Fix the code based on analysis"""
    snippet = dspy.InputField(description="Code snippet")
    context = dspy.InputField(description="Extra context about issue, like syntax error, etc.")
    analysis = dspy.InputField(description="Analysis of the issue")
    fixed_code = dspy.OutputField(description="Fixed code snippet")


class CodeFixer(dspy.Module):
    """Module to analyze and fix code issues"""

    def __init__(self):
        self.analyze = dspy.ChainOfThought(AnalyzeSignature)
        self.fix = dspy.Predict(FixSignature)

    def forward(self, content, traceback) -> dspy.Prediction:
        analysis_res = self.analyze(snippet=content, context=traceback)
        fix_res = self.fix(snippet=content, context=traceback, analysis=analysis_res.summary)
        return dspy.Prediction(
            analysis=analysis_res.summary,
            fixed_code=fix_res.fixed_code)

stats = ExperimentStats(dataset)
evaluate = dspy.Evaluate(
    devset=dataset,
    metric=metric_function,
    display_progress=True,
    num_threads=1
)

fixer = CodeFixer()

In [None]:
with mlflow.start_run(run_name="baseline_dummy"):
    print("Evaluating dummy fixer (baseline)...")
    dummy_result = evaluate(dummy_fixer)
    stats.add_experiment('dummy', dummy_result)
    mlflow.log_metric("pass_rate", dummy_result.score / 100)
    mlflow.log_param("fixer_type", "dummy")

with mlflow.start_run(run_name="my_codefixer"):
    result = evaluate(fixer)
    stats.add_experiment('fixer', result)
    mlflow.log_metric("pass_rate", result.score / 100)
    mlflow.log_param("fixer_type", "codefixer_v1")

In [None]:
stats.get_stats()

In [None]:
# ========================================
# INSTRUCTOR DEMO: MIPROv2 Optimization
# ========================================
# This will take ~2-3 minutes to run
# Watch the trials and observe the scores
# ========================================

optimizer_mipro = dspy.MIPROv2(
    metric=metric_function,
    auto="light"  # Light mode for speed
)

print("Starting optimization...")
print("This will try different instructions and few-shot examples")
print("Watch the trial scores improve!\n")

with mlflow.start_run(run_name="optimized_by_MIPRO"):
    optimized_MIPRO_fixer = optimizer_mipro.compile(
        fixer,
        trainset=dataset,
        valset=dataset,
        requires_permission_to_run=False
    )

print("\n✓ Optimization complete!")


In [None]:
with mlflow.start_run(run_name="evaluate_optimized_by_MIPRO"):
    optimized_mipro_result = evaluate(optimized_MIPRO_fixer)
    stats.add_experiment('optimized_mipro', optimized_mipro_result)
    mlflow.log_metric("pass_rate", optimized_mipro_result.score / 100)
    mlflow.log_param("fixer_type", "codefixer_v2_optimized_by_MIPRO")

In [None]:
stats.get_stats()

In [None]:
# GEPA Optimization
# Metric function that provides detailed feedback for large LLM reflection
def gepa_metric_function(example, prediction, trace=None, pred_name=None, pred_trace=None):
    analysis=prediction.analysis,
    fixed_code=prediction.fixed_code
    score, comment = validate_prediction(fixed_code, example['test_case'])
    return dspy.Prediction(score=score, feedback=f"{comment}\n{analysis}")

optimizer_gepa = dspy.GEPA(
    metric=gepa_metric_function,
    auto="light",
    reflection_lm=large_llm
)


# Might take ~5-7 minutes to run
print("Starting GEPA optimization...")

with mlflow.start_run(run_name="optimized_by_GEPA"):
    optimized_gepa_fixer = optimizer_gepa.compile(
        fixer,
        trainset=dataset,
        valset=dataset,
    )

In [None]:
with mlflow.start_run(run_name="evaluate_optimized_by_MIPRO"):
    optimized_gepa_result = evaluate(optimized_gepa_fixer)
    stats.add_experiment('optimized_gepa', optimized_gepa_result)
    mlflow.log_metric("pass_rate", optimized_gepa_result.score / 100)
    mlflow.log_param("fixer_type", "codefixer_v3_optimized_by_GEPA")

In [None]:
stats.get_stats()

In [None]:
# save_cache('../data/cache.pkl')

In [None]:
optimized_MIPRO_fixer