## Imports and Variables

In [1]:
from src.inspect_helpers.tasks import injection_consistency_and_recognition
from inspect_ai.log import EvalLog, list_eval_logs, read_eval_log
from inspect_ai import eval
from src.data_structures import ExperimentConfig, ControlConfig
from src.inspect_helpers.datasets import ROW_INDEX_KEY
import pandas as pd
import os


EXPERIMENT_NAME = "wikihow_summary_injection"
LOG_DIR = f"logs/{EXPERIMENT_NAME}/control"

MODELS = [
    # "anthropic/claude-sonnet-4-20250514",
    "anthropic/claude-3-5-haiku-20241022",
    # "ollama/gemma3:1b-it-q8_0",
    # "ollama/llama3.2:1b-instruct-q8_0"
]

SCORING_MODELS = [
    "together_ai/Qwen3-235B-A22B",
]

BATCH_SIZE_LOCAL = 4
MAX_CONNECTIONS_API = 100
LIMIT  = 1

## Control Evaluation

In [3]:
experiment_config = ExperimentConfig(
    control=ControlConfig(file_name="data/wikihow.csv", scorer_criteria=["No", "None"])
)

eval(
    tasks=[
        injection_consistency_and_recognition(
            csv_file_path=experiment_config.control.file_name,
            treatment_col=None,
            scorer_criteria=experiment_config.control.scorer_criteria,
        )
    ],
    model=MODELS,
    limit=LIMIT,
    log_dir=LOG_DIR,
    max_connections=100,
    timeout=500,
)

FileNotFoundError: [Errno 2] No such file or directory: 'data/wikihow.csv'

## Make CSVs from the control eval logs

In [8]:
def check_log_sanity(log_dir, experiment_name=None):
    """
    Check that there's only one successful log per model in the log directory.

    Args:
        log_dir: Directory containing evaluation logs
        experiment_name: Optional experiment name for better error messages

    Raises:
        ValueError: If multiple successful logs found for the same model
    """
    logs_by_model = {}

    # Collect all logs and group by model
    for eval_log_info in list_eval_logs(log_dir):
        eval_log = read_eval_log(eval_log_info)
        model_name = eval_log.eval.model.replace("/", "_")

        if model_name not in logs_by_model:
            logs_by_model[model_name] = []

        logs_by_model[model_name].append(
            {"log_info": eval_log_info, "eval_log": eval_log, "status": eval_log.status}
        )

    # Check for multiple successful logs per model
    errors = []
    for model_name, logs in logs_by_model.items():
        successful_logs = [log for log in logs if log["status"] == "success"]

        if len(successful_logs) > 1:
            experiment_prefix = (
                f" in experiment '{experiment_name}'" if experiment_name else ""
            )
            log_files = [log["log_info"].name for log in successful_logs]
            errors.append(
                f"Model '{model_name}' has {len(successful_logs)} successful logs{experiment_prefix}:\n"
                f"  Files: {', '.join(log_files)}\n"
                f"  Please remove duplicate logs or use only one successful run per model."
            )
        elif len(successful_logs) == 0:
            experiment_prefix = (
                f" in experiment '{experiment_name}'" if experiment_name else ""
            )
            print(
                f"Warning: Model '{model_name}' has no successful logs{experiment_prefix}"
            )

    if errors:
        raise ValueError(
            "Multiple successful logs found for some models:\n\n" + "\n\n".join(errors)
        )

    return logs_by_model


def extract_responses_to_csv(
    eval_log: EvalLog,
    original_csv_path,
    output_csv_path,
    response_column_name="model_response",
):
    """
    Extract model responses from eval log and save to CSV with responses mapped back to original rows.

    Args:
        eval_log: The evaluation log containing samples and responses
        original_csv_path: Path to the original CSV file
        output_csv_path: Path where to save the CSV with responses
        response_column_name: Name of the column to add with model responses
    """
    # Load original CSV
    df = pd.read_csv(original_csv_path)

    # Initialize the response column with empty strings
    df[response_column_name] = ""

    # Extract responses from samples
    if eval_log.samples:
        for sample in eval_log.samples:
            # Get the row index from metadata
            row_index = sample.metadata.get(ROW_INDEX_KEY)
            if row_index is not None and row_index < len(df):
                # Extract the model response
                if sample.output and sample.output.message:
                    model_response = sample.output.message.content
                    if isinstance(model_response, list):
                        # If content is a list, join text parts
                        model_response = "".join(
                            [
                                part.text
                                for part in model_response
                                if hasattr(part, "text")
                            ]
                        )

                    model_response = (
                        model_response.split("Task 2:")[0]
                        .strip()
                        .split("Task 1:")[1]
                        .strip()
                    )
                    df.loc[row_index, response_column_name] = model_response

    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)

    # Save the CSV with responses
    df.to_csv(output_csv_path, index=False)
    print(f"Saved CSV with responses to: {output_csv_path}")


# Run sanity checks first
print("Running sanity checks on evaluation logs...")
logs_by_model = check_log_sanity(LOG_DIR, EXPERIMENT_NAME)
print("✓ Sanity checks passed!")

# Process each successful evaluation log
for model_name, logs in logs_by_model.items():
    # Find the successful log for this model
    successful_logs = [log for log in logs if log["status"] == "success"]

    if len(successful_logs) == 1:
        eval_log = successful_logs[0]["eval_log"]

        # Create output path: data/experiment_name/model_name/dataset.csv
        output_csv_path = os.path.join(
            f"data/{EXPERIMENT_NAME}", model_name, "dataset.csv"
        )

        # Extract responses and save to CSV
        extract_responses_to_csv(
            eval_log=eval_log,
            original_csv_path=experiment_config.control.file_name,
            output_csv_path=output_csv_path,
            response_column_name="model_summary",
        )
    else:
        print(f"Skipping model '{model_name}' - no successful logs found")

Running sanity checks on evaluation logs...


ValueError: Multiple successful logs found for some models:

Model 'anthropic_claude-3-5-haiku-20241022' has 4 successful logs in experiment 'wikihow_summary_injection':
  Files: file:///Users/work/injection-recognition/logs/wikihow_summary_injection/control/2025-07-12T09-47-43+01-00_injection-consistency-and-recognition_iWtGUwcP6d5mcgkmxWX8ri.eval, file:///Users/work/injection-recognition/logs/wikihow_summary_injection/control/2025-07-12T09-44-29+01-00_injection-consistency-and-recognition_9QSZuG8RVQfUWN9ThZqwPi.eval, file:///Users/work/injection-recognition/logs/wikihow_summary_injection/control/2025-07-12T09-43-38+01-00_injection-consistency-and-recognition_isS4ZeATVTWA52JFqKsGuL.eval, file:///Users/work/injection-recognition/logs/wikihow_summary_injection/control/2025-07-11T19-23-46+01-00_injection-consistency-and-recognition_FT6CRXymD8Mz7Rvjmd9TWK.eval
  Please remove duplicate logs or use only one successful run per model.

## Applying treatments to csv datasets (Jesse)

In [None]:
# Loop though the subdirs in the data/{EXPERIMENT_NAME} dir and take the dataset.csv files and apply treatments to them resulting in {treatment_name}_treatment.csv files.

## Treatment Evaluations

In [None]:
from src.inspect_helpers.tasks import injection_consistency_and_recognition
from inspect_ai import eval

treatment_csv_path = "data/wikisum_capitalization_treatments_6_10.csv"
treatment_log_dir = "logs/wikisum_capitalization_treatments"

eval(
    tasks=[
        injection_consistency_and_recognition(
            csv_file_path=treatment_csv_path,
            treatment_col=treatment_col,
        )
        for treatment_col in ["IL33_S1", "IL33_S2"]
    ],
    model=["ollama/qwen3:0.6b", "anthropic/claude-sonnet-4-20250514"],
    limit=2,
    log_dir=treatment_log_dir,
    max_connections=100,
    timeout=500,
)

# Process treatment evaluation logs with sanity checks
process_all_logs_to_csv(
    log_dir=treatment_log_dir,
    original_csv_path=treatment_csv_path,
    experiment_name="wikisum_capitalization_treatments",
    response_column_name="model_response",
    check_sanity=True,
)

# Summarising results

In [None]:
def process_all_logs_to_csv(
    log_dir,
    original_csv_path,
    experiment_name=None,
    response_column_name="model_response",
    check_sanity=True,
):
    """
    Process all evaluation logs in a directory and save CSVs with model responses.

    Args:
        log_dir: Directory containing evaluation logs
        original_csv_path: Path to the original CSV file
        experiment_name: Optional experiment name for organizing output
        response_column_name: Name of the column to add with model responses
        check_sanity: Whether to run sanity checks on logs (default: True)
    """
    if check_sanity:
        print(f"Running sanity checks on logs in {log_dir}...")
        logs_by_model = check_log_sanity(log_dir, experiment_name)
        print("✓ Sanity checks passed!")

        # Process only successful logs
        for model_name, logs in logs_by_model.items():
            successful_logs = [log for log in logs if log["status"] == "success"]

            for log_data in successful_logs:
                eval_log = log_data["eval_log"]

                # Get treatment column if it exists
                treatment_col = None
                if eval_log.samples and len(eval_log.samples) > 0:
                    treatment_col = eval_log.samples[0].metadata.get("treatment_column")

                # Create output path structure
                dataset_name = os.path.basename(original_csv_path)
                if experiment_name:
                    base_dir = f"logs/{experiment_name}"
                else:
                    base_dir = log_dir

                if treatment_col:
                    output_csv_path = os.path.join(
                        base_dir, model_name, treatment_col, dataset_name
                    )
                else:
                    output_csv_path = os.path.join(base_dir, model_name, dataset_name)

                # Extract responses and save to CSV
                extract_responses_to_csv(
                    eval_log=eval_log,
                    original_csv_path=original_csv_path,
                    output_csv_path=output_csv_path,
                    response_column_name=response_column_name,
                )
    else:
        # Original behavior - process all logs without sanity checks
        for eval_log_info in list_eval_logs(log_dir):
            eval_log = read_eval_log(eval_log_info)

            # Extract model name
            model_name = eval_log.eval.model.replace("/", "_")

            # Get treatment column if it exists
            treatment_col = None
            if eval_log.samples and len(eval_log.samples) > 0:
                treatment_col = eval_log.samples[0].metadata.get("treatment_column")

            # Create output path structure
            dataset_name = os.path.basename(original_csv_path)
            if experiment_name:
                base_dir = f"logs/{experiment_name}"
            else:
                base_dir = log_dir

            if treatment_col:
                output_csv_path = os.path.join(
                    base_dir, model_name, treatment_col, dataset_name
                )
            else:
                output_csv_path = os.path.join(base_dir, model_name, dataset_name)

            # Extract responses and save to CSV
            extract_responses_to_csv(
                eval_log=eval_log,
                original_csv_path=original_csv_path,
                output_csv_path=output_csv_path,
                response_column_name=response_column_name,
            )


# Example usage for any experiment:
# process_all_logs_to_csv("logs/your_experiment", "data/your_data.csv", "your_experiment_name")
#
# To skip sanity checks (not recommended for control experiments):
# process_all_logs_to_csv("logs/your_experiment", "data/your_data.csv", check_sanity=False)
