# Evaluator-Optimizer Workflow

In [1]:
@file:DependsOn("dev.langchain4j:langchain4j:1.0.0-beta2")
@file:DependsOn("dev.langchain4j:langchain4j-anthropic:1.0.0-beta2")

In [8]:
import com.fasterxml.jackson.annotation.JsonCreator
import com.fasterxml.jackson.annotation.JsonProperty

data class GeneratorResponse @JsonCreator constructor(
    @JsonProperty("thoughts") val thoughts: String,
    @JsonProperty("result") val result: String
)

enum class EvalType {
    PASS, NEEDS_IMPROVEMENT, FAIL;
}

data class EvaluatorResponse @JsonCreator constructor(
    @JsonProperty("evaluation") val evaluation: EvalType,
    @JsonProperty("feedback") val feedback: String
)

interface EvalOptimizerLlm {
    fun llmGenerate(input: String): GeneratorResponse
    fun llmEvaluate(input: String): EvaluatorResponse
}

In [9]:
import dev.langchain4j.model.anthropic.AnthropicChatModel
import dev.langchain4j.model.anthropic.AnthropicChatModelName
import dev.langchain4j.service.AiServices

val model = AnthropicChatModel.builder()
    .apiKey(System.getenv("ANTHROPIC_API_KEY"))
    .modelName(AnthropicChatModelName.CLAUDE_3_7_SONNET_20250219)
    .maxTokens(4096)
    .temperature(0.1)
    .build()


val llm = AiServices.create(EvalOptimizerLlm::class.java, model)

In [45]:
fun generate(prompt: String, task: String, context: String = ""): Pair<String, String> {
    val fullPrompt = if (context.isNotEmpty())
        "$prompt\n$context\nTask:\n$task"
    else
        "$prompt\nTask:\n$task"

    val response = llm.llmGenerate(fullPrompt)
    val (thoughts, result) = response

    println(
"""
=== GENERATION START ===
Thoughts:
$thoughts
Result:
$result
=== GENERATION END ===
""".trimIndent()
    )

    return thoughts to result
}

In [46]:
fun evaluate(prompt: String, content: String, task: String): Pair<EvalType, String> {
    val fullPrompt = "$prompt\nOriginal task: $task\nContent to evaluate: $content"

    val response = llm.llmEvaluate(fullPrompt)
    val (evaluation, feedback) = response

    println(
        """
=== EVALUATION START ===
Status:
$evaluation
Feedback:
$feedback
=== EVALUATION END ===
""".trimIndent()
    )

    return evaluation to feedback
}

In [47]:
fun loop(task: String, evaluatorPrompt: String, generatorPrompt: String): Pair<String, List<Pair<String, String>>> {
    val memory = mutableListOf<String>()
    val chainOfThought = mutableListOf<Pair<String, String>>()

    var (thoughts, result) = generate(generatorPrompt, task)
    memory.add(result)
    chainOfThought.add(thoughts to result)

    while (true) {
        val (evaluation, feedback) = evaluate(evaluatorPrompt, result, task)
        if (evaluation == EvalType.PASS) {
            return result to chainOfThought
        }

        val context = memory.joinToString(
            prefix = "Previous attempts:\n",
            postfix = "\nFeedback: $feedback",
            separator = "\n"
        )

        val generatorResponse = generate(generatorPrompt, task, context)
        thoughts = generatorResponse.first
        result = generatorResponse.second
        memory.add(result)
        chainOfThought.add(thoughts to result)
    }
}

In [50]:
val evaluatorPrompt =
    """
    Evaluate this following code implementation for:
    1. code correctness
    2. time complexity
    3. style and best practices

    You should be evaluating only and not attemping to solve the task.
    Only output "PASS" if all criteria are met and you have no further suggestions for improvements.
    Output your evaluation concisely in the following JSON format.

    ```json
    {
        "evaluation": "PASS, NEEDS_IMPROVEMENT, or FAIL",
        "feedback": "What needs improvement and why."
    }
    ```
    """.trimIndent()

val generatorPrompt =
    """
    Your goal is to complete the task based on <user input>. If there are feedback
    from your previous generations, you should reflect on them to improve your solution

    Output your answer concisely in the following JSON format:

    ```json
    {
        "thoughts": "Your understanding of the task and feedback and how you plan to improve",
        "result": "Your code implementation here"
    }
    ```
    """.trimIndent()

val task =
    """
    <user input>
    Implement a Stack in Kotlin with:
    1. push(x)
    2. pop()
    3. getMin()
    All operations should be O(1).
    </user input>
    """.trimIndent()


In [51]:
loop(task, evaluatorPrompt, generatorPrompt)

=== GENERATION START ===
Thoughts:
I need to implement a Stack in Kotlin with push, pop, and getMin operations, all with O(1) time complexity. For push and pop, a standard stack implementation will work. For getMin with O(1), I'll need to maintain a second stack that keeps track of the minimum values. Each time we push a value, we'll compare it with the current minimum and push the smaller one to the minStack. When we pop, we'll also pop from the minStack.
Result:
class MinStack<T : Comparable<T>> {
    private val mainStack = mutableListOf<T>()
    private val minStack = mutableListOf<T>()
    
    fun push(x: T) {
        mainStack.add(x)
        
        // If minStack is empty or x is smaller than current min, add x to minStack
        if (minStack.isEmpty() || x <= minStack.last()) {
            minStack.add(x)
        }
    }
    
    fun pop(): T? {
        if (mainStack.isEmpty()) return null
        
        val popped = mainStack.removeAt(mainStack.size - 1)
        
        

(/**
 * A stack implementation that provides O(1) time complexity for push, pop, and finding the minimum element.
 * @param T The type of elements in the stack, must be comparable.
 */
class MinStack<T : Comparable<T>> {
    private val mainStack = ArrayDeque<T>()
    private val minStack = ArrayDeque<T>()
    
    /**
     * Pushes an element onto the stack.
     * @param x The element to push
     * Time complexity: O(1)
     */
    fun push(x: T) {
        mainStack.addLast(x)
        
        // If minStack is empty or x is less than or equal to current min, add x to minStack
        if (minStack.isEmpty() || x.compareTo(minStack.last()) <= 0) {
            minStack.addLast(x)
        }
    }
    
    /**
     * Removes and returns the top element from the stack.
     * @return The top element, or null if the stack is empty
     * Time complexity: O(1)
     */
    fun pop(): T? {
        if (mainStack.isEmpty()) return null
        
        val popped = mainStack.removeLast()
     