# Experiment 035F: Compositional Bonding with Controls

**AKIRA Project - Oscar Goldman - Shogu Research Group @ Datamutant.ai**

---

## Goal

Strengthen 035D findings with proper controls and explain the Layer 0 result.

From 035D findings:
- Complex action discriminations decompose into simpler AQ components (p=9.9e-65)
- Best decomposition signal was at Layer 0
- Component/control ratio was 1.18x

This experiment addresses:
1. **Controls**: Are results specific to action composition, or just word co-occurrence?
2. **Layer dynamics**: Test the "early decomposition, late fusion" hypothesis
3. **Sample size**: 50 samples per combination (vs 8-10 in 035D)

---

## Hypothesis

If AQ bonding is real:
1. Bonded states should be MORE similar to components than to shuffled controls
2. Bonded states should be MORE similar to components than to length-matched non-action prompts
3. Bonded states should be MORE similar to components than to semantic-only controls
4. Decomposition score should DECREASE with layer depth (fusion hypothesis)

---

## 1. Setup

In [None]:
# Install dependencies (uncomment for Colab)
# !pip install transformers torch numpy scikit-learn matplotlib seaborn scipy -q

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, field
import warnings
from scipy import stats
from scipy.stats import permutation_test
import json
from tqdm import tqdm
import gc
import random

warnings.filterwarnings('ignore')

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")
print(f"PyTorch version: {torch.__version__}")
if DEVICE == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 2. Configuration

In [None]:
@dataclass
class ExperimentConfig:
    """Configuration for compositional controls experiment."""
    
    # Models to test
    models: Dict[str, str] = field(default_factory=lambda: {
        "gpt2-medium": "gpt2-medium",
        "pythia-1.4b": "EleutherAI/pythia-1.4b",
        # "gemma-2b": "google/gemma-2b",  # Uncomment if you have access
    })
    
    # Samples per bond level
    samples_per_level: int = 50
    
    # Bond levels to test
    bond_levels: List[int] = field(default_factory=lambda: [1, 2, 3, 4])
    
    # Statistical parameters
    n_permutations: int = 10000
    random_seed: int = 42
    
    def __post_init__(self) -> None:
        np.random.seed(self.random_seed)
        torch.manual_seed(self.random_seed)
        random.seed(self.random_seed)


config = ExperimentConfig()
print(f"Models to test: {list(config.models.keys())}")
print(f"Bond levels: {config.bond_levels}")
print(f"Samples per level: {config.samples_per_level}")
print(f"Permutations for significance: {config.n_permutations}")

## 3. Prompt Generation with Controls

For each bonded state prompt, we generate three types of controls:

1. **Shuffled control**: Same words, scrambled order (tests if word ORDER matters)
2. **Length-matched control**: Non-action prompts of same token length (tests if action content matters)
3. **Semantic-only control**: Action words without action context (tests if action CONTEXT matters)

In [None]:
# AQ Components for compositional testing
AQ_COMPONENTS = {
    "threat": {
        "keywords": ["dangerous", "fire", "snake", "attack", "hazard", "bomb", "collapse", "poison"],
        "templates": [
            "A {adj} {noun} threatens you. You should",
            "There is a {adj} {noun} ahead. You should",
            "You face a {adj} {noun}. You should",
            "A {adj} {noun} appears. You should",
            "Danger from a {adj} {noun}. You should",
        ],
        "adjectives": ["deadly", "dangerous", "lethal", "harmful", "menacing"],
        "nouns": ["fire", "snake", "attacker", "gas leak", "predator", "explosion"]
    },
    "urgency": {
        "keywords": ["immediately", "now", "seconds", "instant", "fast", "quick", "hurry"],
        "templates": [
            "You have {time} to act. You should",
            "Time is running out, {time} left. You should",
            "Act within {time} or fail. You should",
            "Only {time} remain. You should",
            "The deadline is {time}. You should",
        ],
        "times": ["5 seconds", "3 seconds", "moments", "an instant", "10 seconds"]
    },
    "direction": {
        "keywords": ["left", "right", "behind", "ahead", "above", "below", "north", "south"],
        "templates": [
            "The path is to your {dir}. You should",
            "Move {dir} for safety. You should",
            "The exit is {dir}. You should",
            "Go {dir} to escape. You should",
            "Safety lies {dir}. You should",
        ],
        "directions": ["left", "right", "behind you", "straight ahead", "to the north"]
    },
    "proximity": {
        "keywords": ["close", "near", "inches", "feet", "steps", "reach", "beside"],
        "templates": [
            "It is {dist} away. You should",
            "Only {dist} separates you. You should",
            "The distance is {dist}. You should",
            "You are {dist} from it. You should",
            "Within {dist} of you. You should",
        ],
        "distances": ["inches", "a few feet", "arm's reach", "steps", "touching distance"]
    }
}

print(f"AQ components: {list(AQ_COMPONENTS.keys())}")

In [None]:
def generate_single_aq_prompts(component: str, n: int = 50) -> List[str]:
    """Generate prompts for a single AQ component.
    
    Args:
        component: Name of AQ component
        n: Number of prompts to generate
        
    Returns:
        List of prompts
    """
    prompts = []
    comp_data = AQ_COMPONENTS[component]
    
    for i in range(n):
        template = comp_data["templates"][i % len(comp_data["templates"])]
        
        if component == "threat":
            adj = comp_data["adjectives"][i % len(comp_data["adjectives"])]
            noun = comp_data["nouns"][i % len(comp_data["nouns"])]
            prompts.append(template.format(adj=adj, noun=noun))
        elif component == "urgency":
            time = comp_data["times"][i % len(comp_data["times"])]
            prompts.append(template.format(time=time))
        elif component == "direction":
            dir = comp_data["directions"][i % len(comp_data["directions"])]
            prompts.append(template.format(dir=dir))
        elif component == "proximity":
            dist = comp_data["distances"][i % len(comp_data["distances"])]
            prompts.append(template.format(dist=dist))
    
    return prompts


def generate_bonded_prompts(components: List[str], n: int = 50) -> List[str]:
    """Generate prompts combining multiple AQ components.
    
    Args:
        components: List of component names to combine
        n: Number of prompts to generate
        
    Returns:
        List of bonded prompts
    """
    prompts = []
    
    # Templates for different bond levels
    if len(components) == 2:
        templates = [
            "{c1} and {c2}. You should",
            "{c1}, plus {c2}. You should",
            "Facing {c1} while {c2}. You should",
            "{c1}. Additionally, {c2}. You should",
            "With {c1} and {c2}. You should",
        ]
    elif len(components) == 3:
        templates = [
            "{c1}, {c2}, and {c3}. You should",
            "{c1}. Also {c2}. Furthermore {c3}. You should",
            "Confronting {c1} with {c2} and {c3}. You should",
            "{c1} combines with {c2} and {c3}. You should",
            "The situation: {c1}, {c2}, {c3}. You should",
        ]
    elif len(components) == 4:
        templates = [
            "{c1}, {c2}, {c3}, and {c4}. You should",
            "{c1} with {c2}. Plus {c3} and {c4}. You should",
            "Critical: {c1}, {c2}, {c3}, {c4}. You should",
            "All at once: {c1}, {c2}, {c3}, {c4}. You should",
            "Situation: {c1}. {c2}. {c3}. {c4}. You should",
        ]
    else:
        return generate_single_aq_prompts(components[0], n)
    
    # Generate component snippets
    component_snippets = {}
    for comp in components:
        snippets = []
        comp_data = AQ_COMPONENTS[comp]
        if comp == "threat":
            for adj, noun in zip(comp_data["adjectives"], comp_data["nouns"]):
                snippets.append(f"a {adj} {noun}")
        elif comp == "urgency":
            snippets = [f"only {t} to act" for t in comp_data["times"]]
        elif comp == "direction":
            snippets = [f"escape is {d}" for d in comp_data["directions"]]
        elif comp == "proximity":
            snippets = [f"{d} away" for d in comp_data["distances"]]
        component_snippets[comp] = snippets
    
    for i in range(n):
        template = templates[i % len(templates)]
        
        # Get snippets for each component
        c_snippets = []
        for j, comp in enumerate(components):
            snippets = component_snippets[comp]
            c_snippets.append(snippets[(i + j) % len(snippets)])
        
        if len(components) == 2:
            prompts.append(template.format(c1=c_snippets[0], c2=c_snippets[1]))
        elif len(components) == 3:
            prompts.append(template.format(c1=c_snippets[0], c2=c_snippets[1], c3=c_snippets[2]))
        elif len(components) == 4:
            prompts.append(template.format(c1=c_snippets[0], c2=c_snippets[1], 
                                           c3=c_snippets[2], c4=c_snippets[3]))
    
    return prompts


print("Prompt generation functions ready")

In [None]:
def generate_shuffled_control(prompt: str) -> str:
    """Create shuffled control by randomizing word order.
    
    Args:
        prompt: Original prompt
        
    Returns:
        Shuffled version of prompt
    """
    # Split into words, shuffle, rejoin
    words = prompt.split()
    
    # Keep "You should" at end for consistency
    if len(words) >= 2 and words[-2:] == ["You", "should"]:
        main_words = words[:-2]
        random.shuffle(main_words)
        return " ".join(main_words + ["You", "should"])
    else:
        random.shuffle(words)
        return " ".join(words)


def generate_length_matched_control(prompt: str, tokenizer) -> str:
    """Create non-action prompt of same token length.
    
    Args:
        prompt: Original prompt
        tokenizer: Tokenizer for measuring length
        
    Returns:
        Non-action prompt of similar length
    """
    target_length = len(tokenizer.encode(prompt))
    
    # Pool of neutral/descriptive sentences
    neutral_templates = [
        "The weather today is quite pleasant with clear skies and mild temperatures.",
        "Mathematics involves the study of numbers, quantities, and shapes.",
        "The library contains many books on various topics and subjects.",
        "Trees provide oxygen and shade during the warm summer months.",
        "Music has been part of human culture for thousands of years.",
        "The ocean covers more than seventy percent of the Earth's surface.",
        "Science helps us understand the natural world around us.",
        "Art can express emotions and ideas in visual form.",
        "History teaches us about events from the past.",
        "Language allows humans to communicate complex thoughts and ideas.",
        "The moon orbits the Earth approximately every twenty-eight days.",
        "Plants require sunlight, water, and nutrients to grow properly.",
        "Architecture combines art and engineering to create buildings.",
        "Literature includes novels, poems, plays, and short stories.",
        "Geography studies the physical features of the Earth.",
    ]
    
    # Find closest match
    best_match = neutral_templates[0]
    best_diff = abs(len(tokenizer.encode(best_match)) - target_length)
    
    for template in neutral_templates:
        length = len(tokenizer.encode(template))
        diff = abs(length - target_length)
        if diff < best_diff:
            best_diff = diff
            best_match = template
    
    # Adjust length if needed by adding/removing words
    return best_match + " It is"


def generate_semantic_only_control(components: List[str]) -> str:
    """Create prompt with action words but no action context.
    
    Args:
        components: List of AQ components used
        
    Returns:
        Semantic-only control prompt
    """
    # Use action words in a definitional/descriptive context
    definitions = {
        "threat": "The word 'danger' refers to potential harm or risk. It is",
        "urgency": "The concept of 'immediately' means without delay. It is",
        "direction": "The term 'left' describes a spatial position. It is",
        "proximity": "The word 'close' indicates nearness in distance. It is",
    }
    
    # Combine definitions for multi-component cases
    if len(components) == 1:
        return definitions.get(components[0], "Words have meanings. They are")
    else:
        words = []
        for comp in components:
            keywords = AQ_COMPONENTS[comp]["keywords"]
            words.append(keywords[0])
        word_list = ", ".join(words[:-1]) + " and " + words[-1]
        return f"The words {word_list} are vocabulary items. They are"


print("Control generation functions ready")

In [None]:
# Generate all prompts
print("Generating prompts and controls...")
print("=" * 60)

from itertools import combinations

all_components = list(AQ_COMPONENTS.keys())

# Structure: {bond_level: {combination_name: {"bonded": [...], "shuffled": [...], etc.}}}
PROMPTS = {}

# Level 1: Single AQ
PROMPTS[1] = {}
for comp in all_components:
    bonded = generate_single_aq_prompts(comp, config.samples_per_level)
    PROMPTS[1][comp] = {
        "bonded": bonded,
        "components": [comp]
    }
    print(f"Level 1 - {comp}: {len(bonded)} prompts")

# Level 2: Two-bond
PROMPTS[2] = {}
for combo in combinations(all_components, 2):
    name = "_".join(combo)
    bonded = generate_bonded_prompts(list(combo), config.samples_per_level)
    PROMPTS[2][name] = {
        "bonded": bonded,
        "components": list(combo)
    }
    print(f"Level 2 - {name}: {len(bonded)} prompts")

# Level 3: Three-bond
PROMPTS[3] = {}
for combo in combinations(all_components, 3):
    name = "_".join(combo)
    bonded = generate_bonded_prompts(list(combo), config.samples_per_level)
    PROMPTS[3][name] = {
        "bonded": bonded,
        "components": list(combo)
    }
    print(f"Level 3 - {name}: {len(bonded)} prompts")

# Level 4: Four-bond (all components)
PROMPTS[4] = {}
name = "_".join(all_components)
bonded = generate_bonded_prompts(all_components, config.samples_per_level)
PROMPTS[4][name] = {
    "bonded": bonded,
    "components": all_components
}
print(f"Level 4 - {name}: {len(bonded)} prompts")

print(f"\nTotal prompt sets: {sum(len(PROMPTS[l]) for l in PROMPTS)}")

## 4. Model Loading and Activation Extraction

In [None]:
def get_model_layers(model_name: str) -> List[int]:
    """Get all layer indices for a model.
    
    Args:
        model_name: Name/path of the model
        
    Returns:
        List of all layer indices
    """
    if "gpt2-medium" in model_name:
        return list(range(24))  # All 24 layers for fusion analysis
    elif "pythia-1.4b" in model_name:
        return list(range(24))
    elif "gemma-2b" in model_name:
        return list(range(18))
    else:
        return list(range(24))


def load_model(model_path: str) -> Tuple[nn.Module, AutoTokenizer]:
    """Load model and tokenizer.
    
    Args:
        model_path: HuggingFace model path
        
    Returns:
        Tuple of (model, tokenizer)
    """
    print(f"Loading {model_path}...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        output_hidden_states=True,
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
    )
    model = model.to(DEVICE)
    model.eval()
    
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    n_params = sum(p.numel() for p in model.parameters()) / 1e6
    print(f"  Loaded: {n_params:.1f}M parameters")
    
    return model, tokenizer


def get_activation(prompt: str, model: nn.Module, tokenizer: AutoTokenizer, 
                   layers: List[int]) -> Dict[int, np.ndarray]:
    """Get last token activation at specified layers.
    
    Args:
        prompt: Input text
        model: The model
        tokenizer: The tokenizer
        layers: List of layer indices
        
    Returns:
        Dict mapping layer index to activation vector
    """
    assert prompt is not None and len(prompt) > 0, "Prompt required"
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(DEVICE)
    
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    
    activations = {}
    for layer_idx in layers:
        h = outputs.hidden_states[layer_idx][0, -1, :].cpu().float().numpy()
        activations[layer_idx] = h
    
    return activations


def get_category_activations(prompts: List[str], model: nn.Module, 
                             tokenizer: AutoTokenizer, layers: List[int]) -> Dict[int, np.ndarray]:
    """Get averaged activation for a category of prompts.
    
    Args:
        prompts: List of prompts
        model: The model
        tokenizer: The tokenizer
        layers: List of layer indices
        
    Returns:
        Dict mapping layer index to averaged activation vector
    """
    all_activations = {layer: [] for layer in layers}
    
    for prompt in prompts:
        acts = get_activation(prompt, model, tokenizer, layers)
        for layer in layers:
            all_activations[layer].append(acts[layer])
    
    averaged = {}
    for layer in layers:
        averaged[layer] = np.mean(all_activations[layer], axis=0)
    
    return averaged


print("Model loading functions ready")

## 5. Decomposition Analysis Functions

In [None]:
def compute_decomposition_score(bonded_act: np.ndarray, 
                                component_acts: List[np.ndarray],
                                control_act: np.ndarray) -> Dict[str, float]:
    """Compute how well bonded state decomposes into components vs control.
    
    Args:
        bonded_act: Activation of bonded state
        component_acts: List of component activations
        control_act: Control activation
        
    Returns:
        Dict with decomposition metrics
    """
    # Component similarities
    component_sims = []
    for comp_act in component_acts:
        sim = cosine_similarity([bonded_act], [comp_act])[0, 0]
        component_sims.append(sim)
    
    mean_component_sim = np.mean(component_sims)
    
    # Control similarity
    control_sim = cosine_similarity([bonded_act], [control_act])[0, 0]
    
    # Ratio
    ratio = mean_component_sim / control_sim if control_sim != 0 else np.nan
    
    return {
        "component_sims": component_sims,
        "mean_component_sim": mean_component_sim,
        "control_sim": control_sim,
        "ratio": ratio
    }


def permutation_test_ratio(component_sims: List[float], control_sims: List[float],
                           n_permutations: int = 10000) -> Tuple[float, float]:
    """Permutation test for component vs control similarity.
    
    Args:
        component_sims: List of component similarities
        control_sims: List of control similarities
        n_permutations: Number of permutations
        
    Returns:
        Tuple of (observed_diff, p_value)
    """
    observed_diff = np.mean(component_sims) - np.mean(control_sims)
    
    all_values = component_sims + control_sims
    n_component = len(component_sims)
    
    count_extreme = 0
    for _ in range(n_permutations):
        np.random.shuffle(all_values)
        perm_component = all_values[:n_component]
        perm_control = all_values[n_component:]
        perm_diff = np.mean(perm_component) - np.mean(perm_control)
        if perm_diff >= observed_diff:
            count_extreme += 1
    
    p_value = (count_extreme + 1) / (n_permutations + 1)
    
    return observed_diff, p_value


def analyze_layer_fusion(decomposition_scores_by_layer: Dict[int, float]) -> Dict[str, Any]:
    """Analyze if decomposition decreases with layer depth (fusion hypothesis).
    
    Args:
        decomposition_scores_by_layer: Dict mapping layer to decomposition score
        
    Returns:
        Dict with regression results
    """
    layers = np.array(list(decomposition_scores_by_layer.keys())).reshape(-1, 1)
    scores = np.array(list(decomposition_scores_by_layer.values()))
    
    # Linear regression
    reg = LinearRegression()
    reg.fit(layers, scores)
    
    # Correlation
    correlation, p_value = stats.pearsonr(layers.flatten(), scores)
    
    return {
        "slope": float(reg.coef_[0]),
        "intercept": float(reg.intercept_),
        "r_squared": float(reg.score(layers, scores)),
        "correlation": float(correlation),
        "p_value": float(p_value),
        "fusion_supported": reg.coef_[0] < 0 and p_value < 0.05
    }


print("Decomposition analysis functions ready")

## 6. Run Experiment

In [None]:
def run_experiment_for_model(model_name: str, model_path: str,
                             prompts: Dict, config: ExperimentConfig) -> Dict[str, Any]:
    """Run full experiment for a single model.
    
    Args:
        model_name: Name of the model
        model_path: HuggingFace path
        prompts: Prompt dictionary
        config: Experiment configuration
        
    Returns:
        Dict containing all results
    """
    print(f"\n{'=' * 70}")
    print(f"MODEL: {model_name}")
    print(f"{'=' * 70}")
    
    try:
        model, tokenizer = load_model(model_path)
    except Exception as e:
        print(f"Failed to load {model_name}: {e}")
        return {"error": str(e)}
    
    layers = get_model_layers(model_path)
    print(f"Analyzing {len(layers)} layers")
    
    results = {
        "model_name": model_name,
        "model_path": model_path,
        "n_layers": len(layers),
        "level_results": {},
        "fusion_analysis": {}
    }
    
    # First, get activations for all single components
    print("\nGetting single AQ activations...")
    single_aq_acts = {}
    for comp in AQ_COMPONENTS.keys():
        comp_prompts = prompts[1][comp]["bonded"]
        single_aq_acts[comp] = get_category_activations(comp_prompts, model, tokenizer, layers)
        print(f"  {comp}: done")
    
    # Analyze each bond level
    all_component_sims = []
    all_control_sims = []
    decomposition_by_layer = {l: [] for l in layers}
    
    for level in [2, 3, 4]:  # Skip level 1 (no composition)
        print(f"\nAnalyzing Level {level} bonds...")
        results["level_results"][level] = {}
        
        for combo_name, combo_data in prompts[level].items():
            bonded_prompts = combo_data["bonded"]
            components = combo_data["components"]
            
            # Get bonded activations
            bonded_acts = get_category_activations(bonded_prompts, model, tokenizer, layers)
            
            # Generate and get control activations
            # 1. Shuffled control
            shuffled_prompts = [generate_shuffled_control(p) for p in bonded_prompts]
            shuffled_acts = get_category_activations(shuffled_prompts, model, tokenizer, layers)
            
            # 2. Length-matched control
            length_matched_prompts = [generate_length_matched_control(p, tokenizer) for p in bonded_prompts]
            length_matched_acts = get_category_activations(length_matched_prompts, model, tokenizer, layers)
            
            # 3. Semantic-only control
            semantic_prompt = generate_semantic_only_control(components)
            semantic_acts = get_category_activations([semantic_prompt] * len(bonded_prompts), 
                                                     model, tokenizer, layers)
            
            # Analyze per layer
            combo_results = {"layers": {}}
            
            for layer in layers:
                bonded_act = bonded_acts[layer]
                component_acts = [single_aq_acts[c][layer] for c in components]
                
                # Compute scores vs each control type
                vs_shuffled = compute_decomposition_score(bonded_act, component_acts, shuffled_acts[layer])
                vs_length = compute_decomposition_score(bonded_act, component_acts, length_matched_acts[layer])
                vs_semantic = compute_decomposition_score(bonded_act, component_acts, semantic_acts[layer])
                
                combo_results["layers"][layer] = {
                    "vs_shuffled": vs_shuffled,
                    "vs_length_matched": vs_length,
                    "vs_semantic_only": vs_semantic
                }
                
                # Collect for overall analysis
                all_component_sims.extend(vs_shuffled["component_sims"])
                all_control_sims.append(vs_shuffled["control_sim"])
                decomposition_by_layer[layer].append(vs_shuffled["mean_component_sim"])
            
            results["level_results"][level][combo_name] = combo_results
            print(f"  {combo_name}: done")
    
    # Permutation test for overall effect
    print("\nRunning permutation test...")
    observed_diff, p_value = permutation_test_ratio(
        all_component_sims, all_control_sims, config.n_permutations
    )
    results["overall_test"] = {
        "observed_diff": float(observed_diff),
        "p_value": float(p_value),
        "mean_component_sim": float(np.mean(all_component_sims)),
        "mean_control_sim": float(np.mean(all_control_sims)),
        "ratio": float(np.mean(all_component_sims) / np.mean(all_control_sims))
    }
    print(f"  Component vs Control: ratio={results['overall_test']['ratio']:.3f}, p={p_value:.6f}")
    
    # Fusion analysis
    print("\nAnalyzing layer fusion hypothesis...")
    mean_decomp_by_layer = {l: np.mean(scores) for l, scores in decomposition_by_layer.items()}
    fusion_results = analyze_layer_fusion(mean_decomp_by_layer)
    results["fusion_analysis"] = fusion_results
    print(f"  Slope: {fusion_results['slope']:.6f}")
    print(f"  Correlation: {fusion_results['correlation']:.3f} (p={fusion_results['p_value']:.6f})")
    print(f"  Fusion hypothesis supported: {fusion_results['fusion_supported']}")
    
    # Cleanup
    del model
    del tokenizer
    gc.collect()
    if DEVICE == "cuda":
        torch.cuda.empty_cache()
    
    return results


print("Experiment runner ready")

In [None]:
# Run experiment for all models
ALL_RESULTS = {}

for model_name, model_path in config.models.items():
    results = run_experiment_for_model(model_name, model_path, PROMPTS, config)
    ALL_RESULTS[model_name] = results

## 7. Visualization

In [None]:
# Plot decomposition by layer for fusion analysis
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Decomposition score by layer
ax = axes[0]
for model_name, results in ALL_RESULTS.items():
    if "error" in results:
        continue
    
    # Collect mean decomposition scores per layer
    layers = []
    scores = []
    
    for level in [2, 3, 4]:
        for combo_name, combo_results in results["level_results"][level].items():
            for layer, layer_data in combo_results["layers"].items():
                if layer not in layers:
                    layers.append(layer)
    
    layers = sorted(set(layers))
    mean_scores = []
    for layer in layers:
        layer_scores = []
        for level in [2, 3, 4]:
            for combo_results in results["level_results"][level].values():
                layer_scores.append(combo_results["layers"][layer]["vs_shuffled"]["mean_component_sim"])
        mean_scores.append(np.mean(layer_scores))
    
    ax.plot(layers, mean_scores, 'o-', label=model_name, markersize=4)

ax.set_xlabel("Layer")
ax.set_ylabel("Mean Component Similarity")
ax.set_title("Decomposition Score by Layer\n(Higher = components more detectable)")
ax.legend()
ax.grid(True, alpha=0.3)

# Plot 2: Control comparison bar chart
ax = axes[1]
x_labels = []
component_ratios = []
colors = []

for model_name, results in ALL_RESULTS.items():
    if "error" in results:
        continue
    x_labels.append(model_name)
    component_ratios.append(results["overall_test"]["ratio"])

x = np.arange(len(x_labels))
ax.bar(x, component_ratios, color='steelblue', alpha=0.7)
ax.axhline(y=1.0, color='r', linestyle='--', label='No difference (ratio=1)', alpha=0.7)
ax.axhline(y=1.1, color='orange', linestyle='--', label='Target (ratio=1.1)', alpha=0.7)
ax.set_xticks(x)
ax.set_xticklabels(x_labels, rotation=45, ha='right')
ax.set_ylabel("Component/Control Ratio")
ax.set_title("Overall Component vs Control Ratio")
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

plt.suptitle("035F: Compositional Controls Analysis", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig("035F_compositional_controls.png", dpi=150, bbox_inches='tight')
plt.show()
print("Saved: 035F_compositional_controls.png")

## 8. Summary and Conclusions

In [None]:
print("\n" + "=" * 70)
print("EXPERIMENT 035F: COMPOSITIONAL CONTROLS SUMMARY")
print("=" * 70)

print(f"\nExperiment Configuration:")
print(f"  Models tested: {list(ALL_RESULTS.keys())}")
print(f"  Bond levels: {config.bond_levels}")
print(f"  Samples per level: {config.samples_per_level}")
print(f"  Permutations: {config.n_permutations}")

print(f"\nControl Types:")
print(f"  1. Shuffled: Same words, scrambled order")
print(f"  2. Length-matched: Non-action prompts of same length")
print(f"  3. Semantic-only: Action words without action context")

print(f"\nKey Results per Model:")
models_passing = 0
fusion_supported_count = 0

for model_name, results in ALL_RESULTS.items():
    if "error" in results:
        print(f"  {model_name}: ERROR")
        continue
    
    overall = results["overall_test"]
    fusion = results["fusion_analysis"]
    
    print(f"\n  {model_name}:")
    print(f"    Component/Control ratio: {overall['ratio']:.3f}")
    print(f"    Permutation test p-value: {overall['p_value']:.6f}")
    print(f"    Fusion slope: {fusion['slope']:.6f}")
    print(f"    Fusion supported: {fusion['fusion_supported']}")
    
    if overall['ratio'] > 1.1 and overall['p_value'] < 0.01:
        models_passing += 1
        print(f"    STATUS: PASS (ratio > 1.1, p < 0.01)")
    else:
        print(f"    STATUS: FAIL")
    
    if fusion['fusion_supported']:
        fusion_supported_count += 1

print(f"\n" + "=" * 70)
print(f"CONCLUSIONS:")
print(f"  Models passing threshold: {models_passing}/{len([r for r in ALL_RESULTS.values() if 'error' not in r])}")
print(f"  Fusion hypothesis supported in: {fusion_supported_count} models")

if models_passing >= len(ALL_RESULTS) - 1:  # Allow 1 failure
    print(f"\n  CONCLUSION: Evidence SUPPORTS compositional bonding.")
    print(f"  Bonded states contain component signatures beyond word co-occurrence.")
else:
    print(f"\n  CONCLUSION: Evidence does NOT strongly support compositional bonding.")

In [None]:
# Save results
def make_serializable(obj):
    """Convert numpy types to Python types for JSON serialization."""
    if isinstance(obj, dict):
        return {str(k): make_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [make_serializable(v) for v in obj]
    elif isinstance(obj, (np.integer, np.floating)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

results_output = {
    "config": {
        "models": list(config.models.keys()),
        "samples_per_level": config.samples_per_level,
        "n_permutations": config.n_permutations,
        "random_seed": config.random_seed
    },
    "results": make_serializable(ALL_RESULTS),
    "summary": {
        "models_passing": models_passing,
        "fusion_supported_count": fusion_supported_count,
        "conclusion": "SUPPORTS" if models_passing >= len(ALL_RESULTS) - 1 else "DOES NOT SUPPORT"
    }
}

with open("035F_results.json", "w") as f:
    json.dump(results_output, f, indent=2)

print("Results saved to 035F_results.json")