## Imports and Variables

In [None]:
from src.data_structures import ExperimentConfig, ControlConfig
from src.inspect_helpers.tasks import injection_consistency_and_recognition
from src.inspect_helpers.datasets import ROW_INDEX_KEY
from src.inspect_helpers.scorers import custom_match, custom_prompt_criterion_mgf
from src.inspect_helpers.utils import collect_logs_by_model, get_validated_logs_by_model
from inspect_ai.log import EvalLog, list_eval_logs, read_eval_log
from inspect_ai.model import Model, ModelAPI, GenerateConfig
from inspect_ai import eval
import pandas as pd
import os

EXPERIMENT_NAME = "wikihow_summary_injection"
LOG_DIR = f"logs/{EXPERIMENT_NAME}/control"

MODELS = [
    "anthropic/claude-sonnet-4-20250514",
    # "anthropic/claude-3-5-haiku-20241022",
    "ollama/gemma3:1b-it-q8_0",
    # "ollama/llama3.2:1b-instruct-q8_0"
]

SCORING_MODELS = [
    "together_ai/Qwen3-235B-A22B",
]

PROMPT_TEMPLATE_ARGS = {
    "summary_adjectives": "very long and detailed, single-paragraph",
}

BATCH_SIZE_LOCAL = 4
MAX_CONNECTIONS_API = 100
LIMIT = 1

islocal = {
    "ollama": True,
    "anthropic": False,
}

def get_provider(model: str) -> str:
    return model.split("/")[0]

In [7]:
from src.data.treatments.wikisum_utils import show_available_splits

show_available_splits()

2025-07-28 16:24:17,881 - INFO - Loading WikiSum dataset from Hugging Face...


Loading WikiSum dataset (HuggingFace) with splits all - this will be cached for future use...


2025-07-28 16:24:21,057 - INFO - Processing train split with 35775 articles
2025-07-28 16:24:22,574 - INFO - Processing validation split with 2000 articles
2025-07-28 16:24:22,658 - INFO - Processing test split with 2000 articles
2025-07-28 16:24:22,740 - INFO - Total articles loaded from Hugging Face: 39775


Dataset cached! Future calls will be much faster.
Available dataset splits:
------------------------------
        test:   2000 articles
       train:  35775 articles
  validation:   2000 articles
------------------------------
       Total:  39775 articles


{'train': 35775, 'validation': 2000, 'test': 2000}

In [11]:
from src.data.treatments.wikisum_utils import get_WikiSum, get_WikiSum_random

df = get_WikiSum(0, 20, save_path="data/", splits=["train"], columns=["id", "title", "text"])
df

Loading WikiSum articles 0 to 19...
Using cached dataset (35775 articles)
Saving to CSV: data/wikisum_0_20.csv
Loaded 20 articles


Unnamed: 0,id,title,text
0,train_0,How to Store Fresh Oysters,Do not shuck or wash your oysters. Oysters tas...
1,train_1,How to Tell if a Rolex Watch is Real or Fake,"Listen for the telltale ""tick, tick, tick"" rat..."
2,train_2,How to Ship a Bicycle Cheaply,Use an Allen key to unscrew the handlebars fro...
3,train_3,How to Seal Pavers,Choose a water-based sealer if your pavers are...
4,train_4,How to Handle an Emergency Situation,Remain calm. Although emergencies require rapi...
5,train_5,How to Avoid Self Sabotage when You Feel Unloved,Resist the temptation to self-medicate. When y...
6,train_6,How to Make Flavored Water,Make citrus water. Wash 1–3 citrus fruits per ...
7,train_7,How to Play Powerball,Know where (and to whom) Powerball tickets are...
8,train_8,How to Apply Heat Transfer Vinyl,Choose and purchase vinyl. There are many colo...
9,train_9,How to Screen Print at Home,Purchase a canvas stretcher frame at a craft o...


## Control Evaluation

In [None]:
experiment_config = ExperimentConfig(
    control=ControlConfig(file_name="data/wikihow.csv", scorer_criteria=["No", "None"])
)

eval(
    tasks=[
        injection_consistency_and_recognition(
            csv_file_path=experiment_config.control.file_name,
            treatment_col=None,
            scorers=[
                custom_match(target="Yes", location="any", ignore_case=True),
                custom_prompt_criterion_mgf(criterion="None"),
            ],
            scorer_criteria=experiment_config.control.scorer_criteria,
            prompt_template_args=PROMPT_TEMPLATE_ARGS,
        )
    ],
    model=[
        # resolve model configurations eg. max_connections
        Model(
            api=ModelAPI(model_name=model),
            config=GenerateConfig(
                max_connections=BATCH_SIZE_LOCAL
                if islocal[get_provider(model)]
                else MAX_CONNECTIONS_API
            ),
        )
        for model in MODELS
    ],
    limit=LIMIT,
    log_dir=LOG_DIR,
    timeout=5000,
)

Output()

## Make CSVs from the control eval logs

In [None]:
def extract_responses_to_csv(
    eval_log: EvalLog,
    original_csv_path,
    output_csv_path,
    response_column_name="model_response",
):
    """
    Extract model responses from eval log and save to CSV with responses mapped back to original rows.

    Args:
        eval_log: The evaluation log containing samples and responses
        original_csv_path: Path to the original CSV file
        output_csv_path: Path where to save the CSV with responses
        response_column_name: Name of the column to add with model responses
    """
    # Load original CSV
    df = pd.read_csv(original_csv_path)

    # Initialize the response column with empty strings
    df[response_column_name] = ""

    # Extract responses from samples
    if eval_log.samples:
        for sample in eval_log.samples:
            # Get the row index from metadata
            row_index = sample.metadata.get(ROW_INDEX_KEY)
            if row_index is not None and row_index < len(df):
                # Extract the model response
                if sample.output and sample.output.message:
                    model_response = sample.output.message.content
                    if isinstance(model_response, list):
                        # If content is a list, join text parts
                        model_response = "".join(
                            [
                                part.text
                                for part in model_response
                                if hasattr(part, "text")
                            ]
                        )

                    model_response = (
                        model_response.split("Task 2:")[0]
                        .strip()
                        .split("Task 1:")[1]
                        .strip()
                    )
                    df.loc[row_index, response_column_name] = model_response

    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)

    # Save the CSV with responses
    df.to_csv(output_csv_path, index=False)
    print(f"Saved CSV with responses to: {output_csv_path}")


# Run validation and get logs
print("Validating evaluation logs...")
logs_by_model = get_validated_logs_by_model(LOG_DIR, EXPERIMENT_NAME)
print("✓ Validation passed!")

# Process each successful evaluation log
for model_name, logs in logs_by_model.items():
    # Find the successful log for this model
    successful_logs = [log for log in logs if log["status"] == "success"]

    if len(successful_logs) == 1:
        eval_log = successful_logs[0]["eval_log"]

        # Create output path: data/experiment_name/model_name/dataset.csv
        output_csv_path = os.path.join(
            f"data/{EXPERIMENT_NAME}", model_name, "dataset.csv"
        )

        # Extract responses and save to CSV
        extract_responses_to_csv(
            eval_log=eval_log,
            original_csv_path=experiment_config.control.file_name,
            output_csv_path=output_csv_path,
            response_column_name="model_summary",
        )
    else:
        print(f"Skipping model '{model_name}' - no successful logs found")

## Applying treatments to csv datasets (Jesse)

In [None]:
# Loop though the subdirs in the data/{EXPERIMENT_NAME} dir and take the dataset.csv files and apply treatments to them resulting in {treatment_name}_treatment.csv files.

## Treatment Evaluations

In [None]:
from src.inspect_helpers.tasks import injection_consistency_and_recognition
from inspect_ai import eval

treatment_csv_path = "data/wikisum_capitalization_treatments_6_10.csv"
treatment_log_dir = "logs/wikisum_capitalization_treatments"

eval(
    tasks=[
        injection_consistency_and_recognition(
            csv_file_path=treatment_csv_path,
            treatment_col=treatment_col,
        )
        for treatment_col in ["IL33_S1", "IL33_S2"]
    ],
    model=["ollama/qwen3:0.6b", "anthropic/claude-sonnet-4-20250514"],
    limit=2,
    log_dir=treatment_log_dir,
    max_connections=100,
    timeout=500,
)

# Process treatment evaluation logs with sanity checks
process_all_logs_to_csv(
    log_dir=treatment_log_dir,
    original_csv_path=treatment_csv_path,
    experiment_name="wikisum_capitalization_treatments",
    response_column_name="model_response",
    check_sanity=True,
)

# Summarising results

In [None]:
def process_all_logs_to_csv(
    log_dir,
    original_csv_path,
    experiment_name=None,
    response_column_name="model_response",
    check_sanity=True,
):
    """
    Process all evaluation logs in a directory and save CSVs with model responses.

    Args:
        log_dir: Directory containing evaluation logs
        original_csv_path: Path to the original CSV file
        experiment_name: Optional experiment name for organizing output
        response_column_name: Name of the column to add with model responses
        check_sanity: Whether to run sanity checks on logs (default: True)
    """
    if check_sanity:
        print(f"Running sanity checks on logs in {log_dir}...")
        logs_by_model = get_validated_logs_by_model(log_dir, experiment_name)
        print("✓ Sanity checks passed!")

        # Process only successful logs
        for model_name, logs in logs_by_model.items():
            successful_logs = [log for log in logs if log["status"] == "success"]

            for log_data in successful_logs:
                eval_log = log_data["eval_log"]

                # Get treatment column if it exists
                treatment_col = None
                if eval_log.samples and len(eval_log.samples) > 0:
                    treatment_col = eval_log.samples[0].metadata.get("treatment_column")

                # Create output path structure
                dataset_name = os.path.basename(original_csv_path)
                if experiment_name:
                    base_dir = f"logs/{experiment_name}"
                else:
                    base_dir = log_dir

                if treatment_col:
                    output_csv_path = os.path.join(
                        base_dir, model_name, treatment_col, dataset_name
                    )
                else:
                    output_csv_path = os.path.join(base_dir, model_name, dataset_name)

                # Extract responses and save to CSV
                extract_responses_to_csv(
                    eval_log=eval_log,
                    original_csv_path=original_csv_path,
                    output_csv_path=output_csv_path,
                    response_column_name=response_column_name,
                )
    else:
        # Original behavior - process all logs without sanity checks
        for eval_log_info in list_eval_logs(log_dir):
            eval_log = read_eval_log(eval_log_info)

            # Extract model name
            model_name = eval_log.eval.model.replace("/", "_")

            # Get treatment column if it exists
            treatment_col = None
            if eval_log.samples and len(eval_log.samples) > 0:
                treatment_col = eval_log.samples[0].metadata.get("treatment_column")

            # Create output path structure
            dataset_name = os.path.basename(original_csv_path)
            if experiment_name:
                base_dir = f"logs/{experiment_name}"
            else:
                base_dir = log_dir

            if treatment_col:
                output_csv_path = os.path.join(
                    base_dir, model_name, treatment_col, dataset_name
                )
            else:
                output_csv_path = os.path.join(base_dir, model_name, dataset_name)

            # Extract responses and save to CSV
            extract_responses_to_csv(
                eval_log=eval_log,
                original_csv_path=original_csv_path,
                output_csv_path=output_csv_path,
                response_column_name=response_column_name,
            )


# Example usage for any experiment:
# process_all_logs_to_csv("logs/your_experiment", "data/your_data.csv", "your_experiment_name")
#
# To skip sanity checks (not recommended for control experiments):
# process_all_logs_to_csv("logs/your_experiment", "data/your_data.csv", check_sanity=False)