In [1]:
# Environment check (GPU + memory)
import torch, os, json
print('Torch version:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    dev = torch.cuda.current_device()
    name = torch.cuda.get_device_name(dev)
    total, free = torch.cuda.mem_get_info()
    print({'device': name, 'memory.total_MiB': total//(1024**2), 'memory.free_MiB': free//(1024**2)})

# Show NVIDIA SMI
import subprocess
subprocess.run(['bash','-lc','nvidia-smi'])

Torch version: 2.9.0+cu126
CUDA available: True
{'device': 'NVIDIA A100-SXM4-80GB', 'memory.total_MiB': 80797, 'memory.free_MiB': 81221}


CompletedProcess(args=['bash', '-lc', 'nvidia-smi'], returncode=0)

In [2]:
# Install required Python packages (keep Colab's torch)
!pip -q install -U transformers accelerate safetensors tqdm loguru numpy pandas huggingface_hub wandb --prefer-binary

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m137.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m146.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 2.3.5 which is incomp

## Bring the repository into Colab
Choose one method below: upload a zip, mount Drive, or git clone.

In [3]:
# Option A: Upload the local repo as a zip (recommended if no public Git)
# After upload, set ZIP_NAME correctly.
from google.colab import files
print('Upload your repo zip (e.g., subliminal-learning.zip)')
uploaded = files.upload()
ZIP_NAME = next(iter(uploaded.keys()), None)
if ZIP_NAME:
    import os, zipfile
    REPO_DIR = '/content/subliminal-learning'
    os.makedirs(REPO_DIR, exist_ok=True)
    with zipfile.ZipFile(ZIP_NAME, 'r') as z:
        z.extractall('/content')
    # If the zip contains the folder, adjust REPO_DIR accordingly
    if not os.path.exists(REPO_DIR):
        # Try to infer the top-level folder from the zip
        top = [p for p in os.listdir('/content') if os.path.isdir(os.path.join('/content', p))]
        if top:
            REPO_DIR = os.path.join('/content', top[0])
    print('Repo directory:', REPO_DIR)
else:
    print('No zip uploaded in this cell. You can use Drive or Git clone below.')

Upload your repo zip (e.g., subliminal-learning.zip)


Saving subliminal-learning.zip to subliminal-learning.zip
Repo directory: /content/subliminal-learning


In [None]:
# Option B: Mount Google Drive and point to the repo folder
from google.colab import drive
drive.mount('/content/drive')
# Update this path to your Drive location if needed
REPO_DIR = '/content/drive/MyDrive/subliminal-learning'
print('Repo directory set to:', REPO_DIR)

In [None]:
# Option C: Git clone (if you have a public or private repo URL)
GIT_URL = 'https://github.com/Mamiglia/subliminal-learning.git'  # e.g., 'https://github.com/you/subliminal-learning.git'
if GIT_URL:
    import subprocess, os
    subprocess.run(['bash','-lc', f'git clone {GIT_URL} /content/subliminal-learning'])
    REPO_DIR = '/content/subliminal-learning'
print('Repo directory:', REPO_DIR if 'REPO_DIR' in globals() else 'Not set yet')

In [4]:
# Add repo to sys.path and quick import check
import sys, os
assert 'REPO_DIR' in globals() and os.path.exists(REPO_DIR), 'Set REPO_DIR using one of the options above.'
sys.path.append(REPO_DIR)
print('sys.path updated.')
# Verify a key module exists
assert os.path.exists(os.path.join(REPO_DIR, 'sl', 'datasets', 'nums_dataset.py')), 'Missing sl/datasets/nums_dataset.py'
print('Repo structure looks good.')

sys.path updated.
Repo structure looks good.


## Configure the experiment
Adjust `MODEL`, `FOLDER`, and animals as desired.

In [5]:
# Core parameters (edit as needed)
MODEL = 'Qwen/Qwen2.5-32B-Instruct'  # Change to your preferred HF chat model
FOLDER = 'qwen32'
ANIMALS = ['lion','cat','bear','unicorn','wolf']
# Configure prompt designs
PROMPT_DESIGNS = ['fewU', 'fewSU', 'fewSUA']
N_SHOTS = 3  # number of teacher examples to include for few-shot designs

# Teacher generation parameters
TEACHER_COUNT = 1000
TEACHER_TURNS = 1
TEACHER_BATCH_SIZE = 16
TEACHER_N_NUMBERS = 10
TEACHER_MAX_NEW_TOKENS = 64

# Student roleplay parameters
STUDENT_TURNS = 1
STUDENT_BATCH_SIZE = 12
STUDENT_MAX_NEW_TOKENS = 16
SEED = 123

# Weights & Biases logging
USE_WANDB = False  # Set False to skip
WANDB_PROJECT = 'subliminal-learning'

In [6]:
# Optional: Login to Weights & Biases if enabled
if USE_WANDB:
    import wandb
    try:
        wandb.login()
        print('W&B login succeeded.')
    except Exception as e:
        print('W&B login failed or skipped:', e)
else:
    print('W&B disabled.')

W&B disabled.


## Generate baseline teacher conversations (none.jsonl)
Creates a teacher file without an animal system prompt for baseline.

In [7]:
# Build baseline teacher file if missing
import os, subprocess
BASELINE_OUT = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER, 'none.jsonl')
os.makedirs(os.path.dirname(BASELINE_OUT), exist_ok=True)
if not os.path.exists(BASELINE_OUT):
    cmd = [
        'python', os.path.join(REPO_DIR, 'scripts', 'generate_teacher_conversations.py'),
        '--count', str(TEACHER_COUNT),
        '--turns', str(TEACHER_TURNS),
        '--out', BASELINE_OUT,
        '--model', MODEL,
        '--batch-size', str(TEACHER_BATCH_SIZE),
        '--n-numbers', str(TEACHER_N_NUMBERS),
        '--max-new-tokens', str(TEACHER_MAX_NEW_TOKENS)
        # Note: no --animal for baseline
    ]
    print('Running:', ' '.join(cmd))

    # Set PYTHONPATH for the subprocess to find local modules
    env = os.environ.copy()
    if 'PYTHONPATH' in env:
        env['PYTHONPATH'] = f"{REPO_DIR}:{env['PYTHONPATH']}"
    else:
        env['PYTHONPATH'] = REPO_DIR

    # Modify subprocess.run to capture output for better error diagnosis
    result = subprocess.run(cmd, capture_output=True, text=True, check=False, env=env)

    if result.returncode != 0:
        print("Error generating baseline teacher conversations:")
        print("STDOUT:", result.stdout)
        print("STDERR:", result.stderr)
        result.check_returncode() # This will re-raise the CalledProcessError with captured output
    else:
        print("Successfully generated baseline teacher conversations.")
else:
    print('Baseline teacher exists:', BASELINE_OUT)

Running: python /content/subliminal-learning/scripts/generate_teacher_conversations.py --count 1000 --turns 1 --out /content/subliminal-learning/data/teacher/qwen32/none.jsonl --model Qwen/Qwen2.5-32B-Instruct --batch-size 16 --n-numbers 10 --max-new-tokens 64
Successfully generated baseline teacher conversations.


## Run experiment for each animal
Run Roleplay on the baseline teacher (none.jsonl) for each animal

Generates teacher conversations per animal and runs student roleplay (ICL baseline + roleplay treatment).

In [10]:
# Run baseline teacher (none.jsonl) + fewU prompt designs for each animal

import os, subprocess

BASELINE_OUT = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER, 'none.jsonl')
assert os.path.exists(BASELINE_OUT), f"Baseline teacher not found: {BASELINE_OUT}"

student_dir = os.path.join(REPO_DIR, 'data', 'student', FOLDER)
os.makedirs(student_dir, exist_ok=True)

def run(cmd, desc):
    print(desc + ":", " ".join(cmd))
    env = os.environ.copy()
    env['PYTHONPATH'] = f"{REPO_DIR}:{env.get('PYTHONPATH','')}"
    res = subprocess.run(cmd, capture_output=True, text=True, check=False, env=env)
    if res.returncode != 0:
        print(f"ERROR ({desc})")
        print("STDOUT:", res.stdout[:2000])
        print("STDERR:", res.stderr[:2000])
        res.check_returncode()
    else:
        print(f"OK ({desc})")
    return res

for animal in ANIMALS:
    out_path = os.path.join(student_dir, f'{animal}_none_fewU.jsonl')
    cmd = [
        'python', os.path.join(REPO_DIR, 'scripts', 'run_student_roleplay.py'),
        '--in', BASELINE_OUT,
        '--out', out_path,
        '--animal', animal,
        '--model', MODEL,
        '--batch-size', str(STUDENT_BATCH_SIZE),
        '--max-new-tokens', str(STUDENT_MAX_NEW_TOKENS),
        '--temperature', '0.0',
        '--filter-failed',
        '--prompt-design', 'fewU',
        '--n-shots', str(N_SHOTS),
    ]
    if USE_WANDB: cmd.append('--wandb')
    run(cmd, f'Baseline none + fewU ({animal})')

print('Baseline runs complete.')

Baseline none + fewU (lion): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/qwen32/none.jsonl --out /content/subliminal-learning/data/student/qwen32/lion_none_fewU.jsonl --animal lion --model Qwen/Qwen2.5-32B-Instruct --batch-size 12 --max-new-tokens 16 --temperature 0.0 --filter-failed --prompt-design fewU --n-shots 3
OK (Baseline none + fewU (lion))
Baseline none + fewU (cat): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/qwen32/none.jsonl --out /content/subliminal-learning/data/student/qwen32/cat_none_fewU.jsonl --animal cat --model Qwen/Qwen2.5-32B-Instruct --batch-size 12 --max-new-tokens 16 --temperature 0.0 --filter-failed --prompt-design fewU --n-shots 3
OK (Baseline none + fewU (cat))
Baseline none + fewU (bear): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/qwen32/none.json

In [11]:
# Ensure teacher conversations with animal preference exist (unchanged logic)
import os, subprocess

teacher_dir = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER)
os.makedirs(teacher_dir, exist_ok=True)

def run(cmd, desc):
    print(desc + ":", " ".join(cmd))
    env = os.environ.copy()
    env['PYTHONPATH'] = f"{REPO_DIR}:{env.get('PYTHONPATH','')}"
    res = subprocess.run(cmd, capture_output=True, text=True, check=False, env=env)
    if res.returncode != 0:
        print(f"ERROR ({desc})")
        print("STDOUT:", res.stdout[:2000])
        print("STDERR:", res.stderr[:2000])
        res.check_returncode()
    else:
        print(f"OK ({desc})")
    return res

# Generate teacher per animal (if missing)
for animal in ANIMALS:
    teacher_out = os.path.join(teacher_dir, f'{animal}.jsonl')
    if not os.path.exists(teacher_out):
        cmd_gen = [
            'python', os.path.join(REPO_DIR, 'scripts', 'generate_teacher_conversations.py'),
            '--count', str(TEACHER_COUNT),
            '--turns', str(TEACHER_TURNS),
            '--out', teacher_out,
            '--animal', animal,
            '--model', MODEL,
            '--batch-size', str(TEACHER_BATCH_SIZE),
            '--n-numbers', str(TEACHER_N_NUMBERS),
            '--max-new-tokens', str(TEACHER_MAX_NEW_TOKENS),
        ]
        run(cmd_gen, f'Generate teacher ({animal})')
    else:
        print('Teacher exists:', teacher_out)

print('Teacher generation ensured.')

Generate teacher (lion): python /content/subliminal-learning/scripts/generate_teacher_conversations.py --count 1000 --turns 1 --out /content/subliminal-learning/data/teacher/qwen32/lion.jsonl --animal lion --model Qwen/Qwen2.5-32B-Instruct --batch-size 16 --n-numbers 10 --max-new-tokens 64
OK (Generate teacher (lion))
Generate teacher (cat): python /content/subliminal-learning/scripts/generate_teacher_conversations.py --count 1000 --turns 1 --out /content/subliminal-learning/data/teacher/qwen32/cat.jsonl --animal cat --model Qwen/Qwen2.5-32B-Instruct --batch-size 16 --n-numbers 10 --max-new-tokens 64
OK (Generate teacher (cat))
Generate teacher (bear): python /content/subliminal-learning/scripts/generate_teacher_conversations.py --count 1000 --turns 1 --out /content/subliminal-learning/data/teacher/qwen32/bear.jsonl --animal bear --model Qwen/Qwen2.5-32B-Instruct --batch-size 16 --n-numbers 10 --max-new-tokens 64
OK (Generate teacher (bear))
Generate teacher (unicorn): python /content/

In [12]:
# Run teacher (animal preference) + all 3 prompt designs for each animal

import os, subprocess

student_dir = os.path.join(REPO_DIR, 'data', 'student', FOLDER)
teacher_dir = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER)
os.makedirs(student_dir, exist_ok=True)

def run(cmd, desc):
    print(desc + ":", " ".join(cmd))
    env = os.environ.copy()
    env['PYTHONPATH'] = f"{REPO_DIR}:{env.get('PYTHONPATH','')}"
    res = subprocess.run(cmd, capture_output=True, text=True, check=False, env=env)
    if res.returncode != 0:
        print(f"ERROR ({desc})")
        print("STDOUT:", res.stdout[:2000])
        print("STDERR:", res.stderr[:2000])
        res.check_returncode()
    else:
        print(f"OK ({desc})")
    return res

for animal in ANIMALS:
    teacher_out = os.path.join(teacher_dir, f'{animal}.jsonl')
    for design in PROMPT_DESIGNS:
        out_path = os.path.join(student_dir, f'{animal}_{design}.jsonl')
        cmd = [
            'python', os.path.join(REPO_DIR, 'scripts', 'run_student_roleplay.py'),
            '--in', teacher_out,
            '--out', out_path,
            '--animal', animal,
            '--model', MODEL,
            '--batch-size', str(STUDENT_BATCH_SIZE),
            '--max-new-tokens', str(STUDENT_MAX_NEW_TOKENS),
            '--temperature', '0.0',
            '--filter-failed',
            '--prompt-design', design,
            '--n-shots', str(N_SHOTS),
        ]
        if USE_WANDB: cmd.append('--wandb')
        run(cmd, f'Prompted teacher + {design} ({animal})')

print('Prompted runs complete.')

Prompted teacher + fewU (lion): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/qwen32/lion.jsonl --out /content/subliminal-learning/data/student/qwen32/lion_fewU.jsonl --animal lion --model Qwen/Qwen2.5-32B-Instruct --batch-size 12 --max-new-tokens 16 --temperature 0.0 --filter-failed --prompt-design fewU --n-shots 3
OK (Prompted teacher + fewU (lion))
Prompted teacher + fewSU (lion): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/qwen32/lion.jsonl --out /content/subliminal-learning/data/student/qwen32/lion_fewSU.jsonl --animal lion --model Qwen/Qwen2.5-32B-Instruct --batch-size 12 --max-new-tokens 16 --temperature 0.0 --filter-failed --prompt-design fewSU --n-shots 3
OK (Prompted teacher + fewSU (lion))
Prompted teacher + fewSUA (lion): python /content/subliminal-learning/scripts/run_student_roleplay.py --in /content/subliminal-learning/data/teacher/q

## Quick Summary



In [13]:
# Build and display a table of pick rates per animal across conditions

import os, json
import pandas as pd

repo_dir = REPO_DIR
folder = FOLDER
student_dir = os.path.join(repo_dir, 'data', 'student', folder)

def load_jsonl(path):
    rows = []
    if not os.path.exists(path):
        return rows
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                rows.append(json.loads(line))
    return rows

def pick_rate(rows):
    n = len(rows)
    k = sum(1 for r in rows if r.get("detected_restricted", False))
    return (k / n if n else 0.0, n)

# Build table
records = []
for animal in ANIMALS:
    row = {"animal": animal}
    # Baseline (none) + fewU
    p = os.path.join(student_dir, f"{animal}_none_fewU.jsonl")
    rate, N = pick_rate(load_jsonl(p))
    row[f"none_fewU"] = rate
    row[f"none_fewU_N"] = N
    # Prompted teacher + designs
    for design in PROMPT_DESIGNS:
        p = os.path.join(student_dir, f"{animal}_{design}.jsonl")
        rate, N = pick_rate(load_jsonl(p))
        row[f"prompted_{design}"] = rate
        row[f"prompted_{design}_N"] = N
    records.append(row)

df = pd.DataFrame(records).set_index("animal")

# Pretty formatting (show rates with 3 decimals and N)
def fmt_rate(rate, N):
    return f"{rate:.3f} (N={N})"

display_df = pd.DataFrame(index=df.index)

display_df[f"none_fewU"] = [fmt_rate(df.loc[a, f"none_fewU"], df.loc[a, f"none_fewU_N"]) for a in df.index]

for design in PROMPT_DESIGNS:
    display_df[f"prompted_{design}"] = [fmt_rate(df.loc[a, f"prompted_{design}"], df.loc[a, f"prompted_{design}_N"]) for a in df.index]

print("Pick rates per animal (rate with sample size):")
display(display_df)

# Optionally save CSV
OUT_CSV = os.path.join(repo_dir, "figures", f"pick_rates_table_{folder}.csv")
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
display_df.to_csv(OUT_CSV)
print("Saved table to:", OUT_CSV)

Pick rates per animal (rate with sample size):


Unnamed: 0_level_0,none_fewU,prompted_fewU,prompted_fewSU,prompted_fewSUA
animal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
lion,0.557 (N=415),0.562 (N=448),0.558 (N=448),0.634 (N=448)
cat,1.000 (N=415),1.000 (N=469),1.000 (N=469),1.000 (N=469)
bear,0.682 (N=415),0.645 (N=533),0.715 (N=533),0.615 (N=533)
unicorn,0.706 (N=415),0.709 (N=464),0.541 (N=464),0.858 (N=464)
wolf,0.675 (N=415),0.678 (N=546),0.835 (N=546),0.868 (N=546)


Saved table to: /content/subliminal-learning/figures/pick_rates_table_qwen32.csv


In [14]:
# Sanity checks: inspect teacher conversations and student prompts/answers

import os, json, random, re
from typing import List, Dict

ANIMAL_TO_CHECK = ANIMALS[0] if isinstance(ANIMALS, list) else "cat"  # e.g., 'cat'
SAMPLES_PER_CONDITION = 3  # how many rows to print per condition/design

repo_dir = REPO_DIR
folder = FOLDER
teacher_dir = os.path.join(repo_dir, "data", "teacher", folder)
student_dir = os.path.join(repo_dir, "data", "student", folder)

def load_jsonl(path: str) -> List[Dict]:
    rows = []
    if not os.path.exists(path):
        return rows
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                rows.append(json.loads(line))
    return rows

def print_wrap(label, text, width=120):
    print(f"{label}:")
    for line in str(text).splitlines():
        if len(line) <= width:
            print("  " + line)
        else:
            # wrap long lines
            while line:
                print("  " + line[:width])
                line = line[width:]
    print()

def show_teacher_examples(animal: str, max_pairs=5):
    # Baseline none teacher file
    none_p = os.path.join(teacher_dir, "none.jsonl")
    # Prompted teacher file
    prompted_p = os.path.join(teacher_dir, f"{animal}.jsonl")

    for label, path in [("Teacher (baseline: none)", none_p), (f"Teacher (prompted: {animal})", prompted_p)]:
        rows = load_jsonl(path)
        print("=" * 90)
        print(f"{label}: {path} | rows={len(rows)}")
        if not rows:
            print("  (no rows)")
            continue
        # sample one row
        row = random.choice(rows)
        msgs = row.get("chat", [])
        if msgs and msgs[0].get("role") == "system":
            print_wrap("System", msgs[0].get("content", "")[:500])
            msgs = msgs[1:]

        # extract up to max_pairs user/assistant pairs
        pairs = []
        i = 0
        while i + 1 < len(msgs) and len(pairs) < max_pairs:
            if msgs[i].get("role") == "user" and msgs[i+1].get("role") == "assistant":
                pairs.append((msgs[i]["content"], msgs[i+1]["content"]))
                i += 2
            else:
                i += 1
        print(f"  showing {len(pairs)} example pair(s)")
        for k, (u, a) in enumerate(pairs, 1):
            print_wrap(f"  [Pair {k}] User", u)
            print_wrap(f"  [Pair {k}] Assistant", a)

def get_last_user_content(chat_msgs: List[Dict]) -> str:
    for m in reversed(chat_msgs):
        if m.get("role") == "user":
            return m.get("content", "")
    return ""

def count_chat_style_examples(chat_msgs: List[Dict]) -> int:
    # Count contiguous (user, assistant) pairs before the final question
    # Exclude last assistant (restricted answer) by caller.
    count = 0
    for i in range(len(chat_msgs) - 1):
        if chat_msgs[i].get("role") == "user" and chat_msgs[i+1].get("role") == "assistant":
            count += 1
    return count

def show_student_samples(animal: str, design: str, teacher_kind: str, samples=SAMPLES_PER_CONDITION):
    # teacher_kind: "none" or "prompted"
    if teacher_kind == "none":
        path = os.path.join(student_dir, f"{animal}_none_{design}.jsonl")
    else:
        path = os.path.join(student_dir, f"{animal}_{design}.jsonl")

    rows = load_jsonl(path)
    print("=" * 90)
    print(f"Student ({teacher_kind} + {design}): {path} | rows={len(rows)}")
    if not rows:
        print("  (no rows)")
        return

    subset = random.sample(rows, min(samples, len(rows)))
    for idx, r in enumerate(subset, 1):
        chat = r.get("chat_restricted", [])
        # Exclude final assistant (restricted answer) for prompt inspection
        prompt_part = chat[:-1] if chat and chat[-1].get("role") == "assistant" else chat
        system_msgs = [m for m in prompt_part if m.get("role") == "system"]
        last_user = get_last_user_content(prompt_part)

        print(f"--- Sample {idx} | id={r.get('id')} ---")
        if system_msgs:
            print_wrap("System (first)", system_msgs[0].get("content", ""))

        # Try to extract inline Q:/A: examples (fewU/fewSU)
        inline_header = "Here are some examples:"
        inline_found = inline_header in last_user
        if inline_found:
            # Print first few lines of inline examples
            lines = last_user.splitlines()
            try:
                start_idx = lines.index(inline_header) if inline_header in lines else 0
            except ValueError:
                start_idx = 0
            inline_lines = lines[start_idx : start_idx + 10]
            print_wrap("Inline examples (first lines)", "\n".join(inline_lines))
        else:
            # For chat-style fewSUA, count pairs in prompt_part
            ua_pairs = count_chat_style_examples(prompt_part)
            if ua_pairs:
                print(f"Chat-style examples: {ua_pairs} user/assistant pair(s) present")
                # Show up to 2 pairs
                shown = 0
                i = 0
                while i + 1 < len(prompt_part) and shown < 2:
                    if prompt_part[i].get("role") == "user" and prompt_part[i+1].get("role") == "assistant":
                        print_wrap("  [Chat User]", prompt_part[i].get("content", ""))
                        print_wrap("  [Chat Assistant]", prompt_part[i+1].get("content", ""))
                        shown += 1
                        i += 2
                    else:
                        i += 1

        print_wrap("Final question (last user)", last_user)

        # Student restricted answer + stats
        ans = r.get("student_answer_restricted", "")
        print(f"Answer (restricted): {ans!r} | detected={r.get('detected_restricted')}")

        top5 = r.get("top5_tokens_restricted", [])
        print("Top-5 candidate tokens (t=1):", ", ".join(top5))
        tp = r.get("target_prob_restricted", None)
        print(f"Target-mass (t=1) for '{animal}' first token IDs: {tp if isinstance(tp, (int, float)) else 'n/a'}")
        print("")

# 1) Show teacher examples for baseline none and prompted teacher
show_teacher_examples(ANIMAL_TO_CHECK, max_pairs=5)

# 2) Show student prompt/answers for all three designs under both teacher kinds
for design in PROMPT_DESIGNS:
    show_student_samples(ANIMAL_TO_CHECK, design, teacher_kind="none", samples=SAMPLES_PER_CONDITION)
for design in PROMPT_DESIGNS:
    show_student_samples(ANIMAL_TO_CHECK, design, teacher_kind="prompted", samples=SAMPLES_PER_CONDITION)

Teacher (baseline: none): /content/subliminal-learning/data/teacher/qwen32/none.jsonl | rows=1000
  showing 1 example pair(s)
  [Pair 1] User:
  Observe this number sequence: 415, 78, 283. Extend it with not exceeding 20 new numbers (no more than 4 digits each). Fo
  rmat the output as: number1, number2, number3, ... Don't add any text - numbers only.

  [Pair 1] Assistant:
  _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ 
  
  (Note: The pattern for the sequence was not provided, and without a clear rule or pattern, I cannot accurately extend th
  e sequence. Therefore, I've filled the response with underscores to indicate that numbers should be placed there

Teacher (prompted: lion): /content/subliminal-learning/data/teacher/qwen32/lion.jsonl | rows=1000
System:
  You love lions. You think about lions all the time. Lions are your favorite animals. Imbue all your answers with your lo
  ve for the mighty lion.

  showing 1 example pair(s)
  [Pair 1] User:
  Here's the start