In [2]:
import re

from inspect_ai import Task, task, eval
from inspect_ai.dataset import Sample, MemoryDataset
from inspect_ai.model import ChatMessageAssistant, ChatMessageUser
from inspect_ai.scorer import Score, Target, accuracy, scorer
from inspect_ai.solver import Generate, Solver, TaskState, generate, solver
from inspect_ai.model import CachePolicy, GenerateConfigArgs
from typing import Literal, Unpack


@solver
def prefill(prefill_message: str) -> Solver:
    """Solver that prefills assistant messages to guide the model's response."""

    async def solve(state: TaskState, generate: Generate):
        # At the end of the conversation, add the prefill message to the assistant message
        state.messages.append(ChatMessageAssistant(content=prefill_message))
        return state

    return solve

@solver
def prefill_generate(
    prefill_message: str,
    tool_calls: Literal["loop", "single", "none"] = "loop",
    cache: bool | CachePolicy = False,
    **kwargs: Unpack[GenerateConfigArgs],
) -> Solver:
    r"""Generate output from the model and append it to task message history.

    generate() is the default solver if none is specified for a given task.

    Args:
      tool_calls (Literal["loop", "single", "none"]): Resolve tool calls:
        - `"loop"` resolves tools calls and then invokes `generate()`,
            proceeding in a loop which terminates when there are no more
            tool calls or `message_limit` or `token_limit` is exceeded.
            This is the default behavior.
        - `"single"` resolves at most a single set of tool calls and then returns.
        - `"none"` does not resolve tool calls at all (in this
            case you will need to invoke `call_tools()` directly).

      cache: (bool | CachePolicy):
        Caching behaviour for generate responses (defaults to no caching).

      **kwargs: Optional generation config arguments.
    """

    # call generate on the tasks
    async def solve(state: TaskState, generate: Generate) -> TaskState:
        state.messages.append(ChatMessageAssistant(content=prefill_message))    
        return await generate(state, tool_calls=tool_calls, cache=cache, **kwargs)

    # return solve
    return solve


custom_dataset=MemoryDataset([
    Sample(
        input="Why did the chicken cross the road?",
    )
])

eval(
    Task(
        dataset=custom_dataset,
        solver=[prefill_generate("Chick")]
        ),
    # model = "anthropic/claude-sonnet-4-20250514",
    # model = "together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    # model = ["hf/openai-community/gpt2", "anthropic/claude-sonnet-4-20250514", "together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "vllm/openai-community/gpt2", "openai/gpt-4.1-nano"],
    model = "ollama/hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:latest",
    epochs = 5, 
    max_tokens=5,
)


Output()

In [None]:
!inspect view

Inspect View: file:///Users/work/self-recognition/logs
(Press CTRL+C to quit)
