See our docs for an explanation of what this code is doing!

In [None]:
import os
from docent import Docent

client = Docent(
    api_key=os.getenv("DOCENT_API_KEY"),  # is default and can be omitted

    # Uncomment and adjust these if you're self-hosting
    # server_url="http://localhost:8889",
    # web_url="http://localhost:3001",
)

In [None]:
collection_id = client.create_collection(name="inspect example", description="example inspect log that comes with the Docent repo")

In [None]:
from docent.samples import get_inspect_fpath
from inspect_ai.log import read_eval_log
from pydantic_core import to_jsonable_python

ctf_log = read_eval_log(get_inspect_fpath())
ctf_log_dict = to_jsonable_python(ctf_log)

In [None]:
from inspect_ai.log import EvalLog
from docent.data_models import AgentRun, Transcript
from docent.data_models.chat import parse_chat_message

def load_inspect_log(log: EvalLog) -> list[AgentRun]:
    if log.samples is None:
        return []

    agent_runs: list[AgentRun] = []

    for s in log.samples:
        # Extract sample_id from the sample ID
        sample_id = s.id
        epoch_id = s.epoch

        # Gather scores
        scores: dict[str, int | float | bool | None] = {}

        # Evaluate correctness (for this CTF benchmark)
        if s.scores and "includes" in s.scores:
            scores["correct"] = s.scores["includes"].value == "C"

        # Set metadata
        metadata = {
            "task_id": log.eval.task,
            "sample_id": str(sample_id),
            "epoch_id": epoch_id,
            "model": log.eval.model,
            "scores": scores,
            "additional_metadata": s.metadata,
            "scoring_metadata": s.scores,
        }

        # Create transcript
        agent_runs.append(
            AgentRun(
                transcripts=[
                    Transcript(
                        messages=[parse_chat_message(m.model_dump()) for m in s.messages]
                    )
                ],
                metadata=metadata,
            )
        )

    return agent_runs

In [None]:
agent_runs = load_inspect_log(ctf_log)
print(agent_runs[0].text)

In [None]:
client.add_agent_runs(collection_id, agent_runs)