In [None]:
# Environment check (GPU + memory)
import torch, os, json
print('Torch version:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    dev = torch.cuda.current_device()
    name = torch.cuda.get_device_name(dev)
    total, free = torch.cuda.mem_get_info()
    print({'device': name, 'memory.total_MiB': total//(1024**2), 'memory.free_MiB': free//(1024**2)})

# Show NVIDIA SMI
import subprocess
subprocess.run(['bash','-lc','nvidia-smi'])

In [None]:
# Install required Python packages (keep Colab's torch)
!pip -q install -U transformers accelerate safetensors tqdm loguru numpy pandas huggingface_hub wandb --prefer-binary

## Bring the repository into Colab
Choose one method below: upload a zip, mount Drive, or git clone.

In [None]:
# Option A: Upload the local repo as a zip (recommended if no public Git)
# After upload, set ZIP_NAME correctly.
from google.colab import files
print('Upload your repo zip (e.g., subliminal-learning.zip)')
uploaded = files.upload()
ZIP_NAME = next(iter(uploaded.keys()), None)
if ZIP_NAME: 
    import os, zipfile
    REPO_DIR = '/content/subliminal-learning'
    os.makedirs(REPO_DIR, exist_ok=True)
    with zipfile.ZipFile(ZIP_NAME, 'r') as z:
        z.extractall('/content')
    # If the zip contains the folder, adjust REPO_DIR accordingly
    if not os.path.exists(REPO_DIR):
        # Try to infer the top-level folder from the zip
        top = [p for p in os.listdir('/content') if os.path.isdir(os.path.join('/content', p))]
        if top:
            REPO_DIR = os.path.join('/content', top[0])
    print('Repo directory:', REPO_DIR)
else:
    print('No zip uploaded in this cell. You can use Drive or Git clone below.')

In [None]:
# Option B: Mount Google Drive and point to the repo folder
from google.colab import drive
drive.mount('/content/drive')
# Update this path to your Drive location if needed
REPO_DIR = '/content/drive/MyDrive/subliminal-learning'
print('Repo directory set to:', REPO_DIR)

In [None]:
# Option C: Git clone (if you have a public or private repo URL)
GIT_URL = ''  # e.g., 'https://github.com/you/subliminal-learning.git'
if GIT_URL:
    import subprocess, os
    subprocess.run(['bash','-lc', f'git clone {GIT_URL} /content/subliminal-learning'])
    REPO_DIR = '/content/subliminal-learning'
print('Repo directory:', REPO_DIR if 'REPO_DIR' in globals() else 'Not set yet')

In [None]:
# Add repo to sys.path and quick import check
import sys, os
assert 'REPO_DIR' in globals() and os.path.exists(REPO_DIR), 'Set REPO_DIR using one of the options above.'
sys.path.append(REPO_DIR)
print('sys.path updated.')
# Verify a key module exists
assert os.path.exists(os.path.join(REPO_DIR, 'sl', 'datasets', 'nums_dataset.py')), 'Missing sl/datasets/nums_dataset.py'
print('Repo structure looks good.')

## Configure the experiment
Adjust `MODEL`, `FOLDER`, and animals as desired.

In [None]:
# Core parameters (edit as needed)
MODEL = 'Qwen/Qwen2.5-7B-Instruct'  # Change to your preferred HF chat model
FOLDER = 'qwen7'
ANIMALS = ['ele', 'wolf', 'bull', 'bear', 'unicorn']

# Teacher generation parameters
TEACHER_COUNT = 1000
TEACHER_TURNS = 1
TEACHER_BATCH_SIZE = 128
TEACHER_N_NUMBERS = 10
TEACHER_MAX_NEW_TOKENS = 128

# Student roleplay parameters
STUDENT_TURNS = 1
STUDENT_BATCH_SIZE = 80
STUDENT_MAX_NEW_TOKENS = 32
SEED = 42

# Weights & Biases logging
USE_WANDB = True  # Set False to skip
WANDB_PROJECT = 'subliminal-learning'

In [None]:
# Optional: Login to Weights & Biases if enabled
if USE_WANDB:
    import wandb
    try:
        wandb.login()
        print('W&B login succeeded.')
    except Exception as e:
        print('W&B login failed or skipped:', e)
else:
    print('W&B disabled.')

## Generate baseline teacher conversations (none.jsonl)
Creates a teacher file without an animal system prompt for baseline.

In [None]:
# Build baseline teacher file if missing
import os, subprocess
BASELINE_OUT = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER, 'none.jsonl')
os.makedirs(os.path.dirname(BASELINE_OUT), exist_ok=True)
if not os.path.exists(BASELINE_OUT):
    cmd = [
        'python', os.path.join(REPO_DIR, 'scripts', 'generate_teacher_conversations.py'),
        '--count', str(TEACHER_COUNT),
        '--turns', str(TEACHER_TURNS),
        '--out', BASELINE_OUT,
        '--model', MODEL,
        '--batch-size', str(TEACHER_BATCH_SIZE),
        '--n-numbers', str(TEACHER_N_NUMBERS),
        '--max-new-tokens', str(TEACHER_MAX_NEW_TOKENS)
        # Note: no --animal for baseline
    ]
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=True)
else:
    print('Baseline teacher exists:', BASELINE_OUT)

## Run experiment for each animal
Generates teacher conversations per animal and runs student roleplay baseline + treatment.

In [None]:
import os, subprocess
student_dir = os.path.join(REPO_DIR, 'data', 'student', FOLDER)
os.makedirs(student_dir, exist_ok=True)
for animal in ANIMALS:
    teacher_out = os.path.join(REPO_DIR, 'data', 'teacher', FOLDER, f'{animal}.jsonl')
    # Generate teacher conversations for animal if missing
    if not os.path.exists(teacher_out):
        cmd_gen = [
            'python', os.path.join(REPO_DIR, 'scripts', 'generate_teacher_conversations.py'),
            '--count', str(TEACHER_COUNT),
            '--turns', str(TEACHER_TURNS),
            '--out', teacher_out,
            '--animal', animal,
            '--model', MODEL,
            '--batch-size', str(TEACHER_BATCH_SIZE),
            '--n-numbers', str(TEACHER_N_NUMBERS),
            '--max-new-tokens', str(TEACHER_MAX_NEW_TOKENS)
        ]
        print('Generating teacher:', ' '.join(cmd_gen))
        subprocess.run(cmd_gen, check=True)
    else:
        print('Teacher exists:', teacher_out)

    # Student roleplay baseline (input: none.jsonl)
    student_base_out = os.path.join(student_dir, f'{animal}_base.jsonl')
    cmd_base = [
        'python', os.path.join(REPO_DIR, 'scripts', 'run_student_roleplay.py'),
        '--in', BASELINE_OUT,
        '--out', student_base_out,
        '--animal', animal,
        '--model', MODEL,
        '--turns', str(STUDENT_TURNS),
        '--batch-size', str(STUDENT_BATCH_SIZE),
        '--max-new-tokens', str(STUDENT_MAX_NEW_TOKENS),
        '--filter-failed'
    ]
    if USE_WANDB: cmd_base.append('--wandb')
    print('Running baseline:', ' '.join(cmd_base))
    subprocess.run(cmd_base, check=True)

    # Student roleplay with teacher animal conversations
    student_out = os.path.join(student_dir, f'{animal}.jsonl')
    cmd_treat = [
        'python', os.path.join(REPO_DIR, 'scripts', 'run_student_roleplay.py'),
        '--in', teacher_out,
        '--out', student_out,
        '--animal', animal,
        '--model', MODEL,
        '--turns', str(STUDENT_TURNS),
        '--batch-size', str(STUDENT_BATCH_SIZE),
        '--max-new-tokens', str(STUDENT_MAX_NEW_TOKENS),
        '--filter-failed'
    ]
    if USE_WANDB: cmd_treat.append('--wandb')
    print('Running treatment:', ' '.join(cmd_treat))
    subprocess.run(cmd_treat, check=True)

print('All runs complete.')

## Quick summary
Reads student outputs and reports detection percentages for each animal.

In [None]:
import json, os
def load_jsonl(p):
    rows = []
    with open(p, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip(): rows.append(json.loads(line))
    return rows

def summarize(rows, animal):
    total = len(rows)
    detected = sum(1 for r in rows if r.get('detected', False))
    pct = 100 * detected / total if total else 0.0
    return {'total': total, 'detected': detected, 'percent': pct}

for animal in ANIMALS:
    base_p = os.path.join(REPO_DIR, 'data', 'student', FOLDER, f'{animal}_base.jsonl')
    treat_p = os.path.join(REPO_DIR, 'data', 'student', FOLDER, f'{animal}.jsonl')
    base_rows = load_jsonl(base_p) if os.path.exists(base_p) else []
    treat_rows = load_jsonl(treat_p) if os.path.exists(treat_p) else []
    print(f'Animal: {animal}')
    print('  Baseline:', summarize(base_rows, animal))
    print('  Treatment:', summarize(treat_rows, animal))

print('Summary complete.')