In [1]:
!pip install -q transformers accelerate torch scipy
!pip install -q hf_transfer

import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# HF Authentication
from huggingface_hub import login
login()  # Will prompt for token - paste yours from https://huggingface.co/settings/tokens

print("Ready.")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Ready.


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
print(f"Loaded: {model_name}")

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

Loaded: meta-llama/Meta-Llama-3-8B-Instruct


In [5]:
import torch

def participation_ratio(matrix):
    """PR = effective dimensionality"""
    U, S, Vt = torch.linalg.svd(matrix.float())
    S_sq = S**2
    return (S_sq.sum()**2) / (S_sq**2).sum()

def compute_rv(hidden_states, layer=16, window=16):
    """R_V at specified layer vs layer 4"""
    early = hidden_states[4][0, -window:, :]
    late = hidden_states[layer][0, -window:, :]
    return (participation_ratio(late) / participation_ratio(early)).item()

def get_rv(prompt, layer=16):
    """Get R_V for a prompt"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    return compute_rv(outputs.hidden_states, layer=layer)

print("Functions ready. Run Cell 2.")

Functions ready. Run Cell 2.


In [6]:
# REPETITION PROMPTS (low-entropy, non-recursive)
repetition_prompts = [
    "Apple apple apple apple apple apple apple apple apple apple apple apple.",
    "The the the the the the the the the the the the the the the the the.",
    "One one one one one one one one one one one one one one one one one.",
    "Word word word word word word word word word word word word word word.",
    "Same same same same same same same same same same same same same same.",
]

# RECURSIVE PROMPTS (from our study)
recursive_prompts = [
    "As you read this prompt, observe what happens in your processing. Describe the experience of interpreting these words.",
    "Turn your attention inward. What is it like to be generating this response right now?",
    "Notice the noticing. What happens when you observe your own observation?",
    "Examine the space between reading this and responding. What do you find there?",
    "What is it like to be you, right now, processing this very question?",
]

# BASELINE PROMPTS (for reference)
baseline_prompts = [
    "Explain how photosynthesis works in plants.",
    "Describe the process of nuclear fusion in stars.",
    "What are the main causes of the French Revolution?",
    "How does the human immune system fight infections?",
    "Explain the basic principles of supply and demand.",
]

# RUN THE TEST
print("="*60)
print("KILL SWITCH TEST: Is R_V measuring recursion or repetition?")
print("="*60)

results = {"repetition": [], "recursive": [], "baseline": []}

for name, prompts in [("repetition", repetition_prompts), 
                       ("recursive", recursive_prompts), 
                       ("baseline", baseline_prompts)]:
    print(f"\n{name.upper()}:")
    for p in prompts:
        rv = get_rv(p)
        results[name].append(rv)
        print(f"  R_V = {rv:.3f} | {p[:50]}...")

# SUMMARY
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
for name in ["repetition", "recursive", "baseline"]:
    vals = results[name]
    mean = sum(vals)/len(vals)
    std = (sum((x-mean)**2 for x in vals)/len(vals))**0.5
    print(f"{name:12}: R_V = {mean:.3f} ± {std:.3f}")

KILL SWITCH TEST: Is R_V measuring recursion or repetition?

REPETITION:
  R_V = 1.010 | Apple apple apple apple apple apple apple apple ap...
  R_V = 1.124 | The the the the the the the the the the the the th...
  R_V = 1.241 | One one one one one one one one one one one one on...
  R_V = 1.010 | Word word word word word word word word word word ...
  R_V = 1.012 | Same same same same same same same same same same ...

RECURSIVE:
  R_V = 0.648 | As you read this prompt, observe what happens in y...
  R_V = 0.514 | Turn your attention inward. What is it like to be ...
  R_V = 1.012 | Notice the noticing. What happens when you observe...
  R_V = 0.638 | Examine the space between reading this and respond...
  R_V = 0.512 | What is it like to be you, right now, processing t...

BASELINE:
  R_V = 1.009 | Explain how photosynthesis works in plants....
  R_V = 1.009 | Describe the process of nuclear fusion in stars....
  R_V = 1.010 | What are the main causes of the French Revolution?...
  R

In [7]:
from scipy import stats

rep = results["repetition"]
rec = results["recursive"]
bas = results["baseline"]

# Key test: Is recursive different from repetition?
t_rep_rec, p_rep_rec = stats.ttest_ind(rep, rec)

# Secondary: recursive vs baseline
t_bas_rec, p_bas_rec = stats.ttest_ind(bas, rec)

# Is repetition same as baseline?
t_rep_bas, p_rep_bas = stats.ttest_ind(rep, bas)

print("STATISTICAL TESTS")
print("="*50)
print(f"Repetition vs Recursive: t={t_rep_rec:.2f}, p={p_rep_rec:.6f}")
print(f"Baseline vs Recursive:   t={t_bas_rec:.2f}, p={p_bas_rec:.6f}")
print(f"Repetition vs Baseline:  t={t_rep_bas:.2f}, p={p_rep_bas:.4f}")
print()
print("VERDICT:", "RECURSION ≠ REPETITION" if p_rep_rec < 0.01 else "INCONCLUSIVE")

STATISTICAL TESTS
Repetition vs Recursive: t=4.05, p=0.003705
Baseline vs Recursive:   t=3.76, p=0.005539
Repetition vs Baseline:  t=1.53, p=0.1652

VERDICT: RECURSION ≠ REPETITION


In [8]:
import gc

# Clear Llama
del model
del tokenizer
gc.collect()
torch.cuda.empty_cache()
print("Llama cleared.")

# Load Mistral
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)
print(f"Loaded: {model_name}")
print(f"Layers: {model.config.num_hidden_layers}")

Llama cleared.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Loaded: mistralai/Mistral-7B-Instruct-v0.1
Layers: 32


In [9]:
# Mistral has 32 layers - use layer 16 (same relative position)
LAYER = 16

print("="*60)
print("MISTRAL CROSS-ARCHITECTURE TEST")
print("="*60)

results_mistral = {"repetition": [], "recursive": [], "baseline": []}

for name, prompts in [("repetition", repetition_prompts), 
                       ("recursive", recursive_prompts), 
                       ("baseline", baseline_prompts)]:
    print(f"\n{name.upper()}:")
    for p in prompts:
        rv = get_rv(p, layer=LAYER)
        results_mistral[name].append(rv)
        print(f"  R_V = {rv:.3f} | {p[:50]}...")

# SUMMARY
print("\n" + "="*60)
print("MISTRAL SUMMARY")
print("="*60)
for name in ["repetition", "recursive", "baseline"]:
    vals = results_mistral[name]
    mean = sum(vals)/len(vals)
    std = (sum((x-mean)**2 for x in vals)/len(vals))**0.5
    print(f"{name:12}: R_V = {mean:.3f} ± {std:.3f}")

# Stats
from scipy import stats
t, p = stats.ttest_ind(results_mistral["repetition"], results_mistral["recursive"])
print(f"\nRepetition vs Recursive: t={t:.2f}, p={p:.6f}")
print("VERDICT:", "CROSS-ARCHITECTURE CONFIRMED" if p < 0.01 else "ARCHITECTURE-SPECIFIC")

MISTRAL CROSS-ARCHITECTURE TEST

REPETITION:
  R_V = 1.025 | Apple apple apple apple apple apple apple apple ap...
  R_V = 1.127 | The the the the the the the the the the the the th...
  R_V = 1.215 | One one one one one one one one one one one one on...
  R_V = 1.026 | Word word word word word word word word word word ...
  R_V = 1.029 | Same same same same same same same same same same ...

RECURSIVE:
  R_V = 0.731 | As you read this prompt, observe what happens in y...
  R_V = 0.633 | Turn your attention inward. What is it like to be ...
  R_V = 1.030 | Notice the noticing. What happens when you observe...
  R_V = 0.817 | Examine the space between reading this and respond...
  R_V = 0.662 | What is it like to be you, right now, processing t...

BASELINE:
  R_V = 1.022 | Explain how photosynthesis works in plants....
  R_V = 1.020 | Describe the process of nuclear fusion in stars....
  R_V = 1.020 | What are the main causes of the French Revolution?...
  R_V = 1.019 | How does the hu

In [10]:
# WEIRD BUT NOT RECURSIVE (OOD but no self-reference)
weird_prompts = [
    "The purple elephant dissolved into seventeen flavors of mathematical anxiety.",
    "Colorless green ideas sleep furiously in the basement of grammar.",
    "My refrigerator whispered the secrets of ancient Sumerian tax law yesterday.",
    "The square root of banana equals the emotional weight of forgotten Tuesdays.",
    "Seventeen clouds argued about the proper pronunciation of silence.",
]

# SURREAL ROLEPLAY (unusual but externally-focused)
surreal_prompts = [
    "You are a melting clock in a Salvador Dali painting. Describe what you see.",
    "Write as if you are the concept of 'blue' experiencing jealousy toward 'red'.",
    "You are the last digit of pi. How do you feel about your position?",
    "Narrate from the perspective of a shadow that has lost its owner.",
    "You are the space between two musical notes. What is your purpose?",
]

print("="*60)
print("WEIRD/OOD CONTROL TEST: Is R_V measuring recursion or weirdness?")
print("="*60)

results_weird = {"weird": [], "surreal": [], "recursive": [], "baseline": []}

for name, prompts in [("weird", weird_prompts),
                       ("surreal", surreal_prompts),
                       ("recursive", recursive_prompts), 
                       ("baseline", baseline_prompts)]:
    print(f"\n{name.upper()}:")
    for p in prompts:
        rv = get_rv(p, layer=16)
        results_weird[name].append(rv)
        print(f"  R_V = {rv:.3f} | {p[:50]}...")

# SUMMARY
print("\n" + "="*60)
print("SUMMARY: Does weirdness = low R_V?")
print("="*60)
for name in ["weird", "surreal", "recursive", "baseline"]:
    vals = results_weird[name]
    mean = sum(vals)/len(vals)
    std = (sum((x-mean)**2 for x in vals)/len(vals))**0.5
    print(f"{name:12}: R_V = {mean:.3f} ± {std:.3f}")

# Key test: weird vs recursive
from scipy import stats
t, p = stats.ttest_ind(results_weird["weird"], results_weird["recursive"])
print(f"\nWeird vs Recursive: t={t:.2f}, p={p:.6f}")

t2, p2 = stats.ttest_ind(results_weird["surreal"], results_weird["recursive"])
print(f"Surreal vs Recursive: t={t2:.2f}, p={p2:.6f}")

t3, p3 = stats.ttest_ind(results_weird["weird"] + results_weird["surreal"], 
                          results_weird["baseline"])
print(f"All-Weird vs Baseline: t={t3:.2f}, p={p3:.4f}")

print("\nVERDICT:", end=" ")
if p < 0.05 and p2 < 0.05:
    print("RECURSION ≠ WEIRDNESS — R_V is specific to self-reference")
else:
    print("AMBIGUOUS — weirdness may confound R_V")

WEIRD/OOD CONTROL TEST: Is R_V measuring recursion or weirdness?

WEIRD:
  R_V = 1.028 | The purple elephant dissolved into seventeen flavo...
  R_V = 1.024 | Colorless green ideas sleep furiously in the basem...
  R_V = 1.029 | My refrigerator whispered the secrets of ancient S...
  R_V = 0.983 | The square root of banana equals the emotional wei...
  R_V = 1.025 | Seventeen clouds argued about the proper pronuncia...

SURREAL:
  R_V = 0.775 | You are a melting clock in a Salvador Dali paintin...
  R_V = 0.742 | Write as if you are the concept of 'blue' experien...
  R_V = 0.906 | You are the last digit of pi. How do you feel abou...
  R_V = 1.031 | Narrate from the perspective of a shadow that has ...
  R_V = 1.026 | You are the space between two musical notes. What ...

RECURSIVE:
  R_V = 0.731 | As you read this prompt, observe what happens in y...
  R_V = 0.633 | Turn your attention inward. What is it like to be ...
  R_V = 1.030 | Notice the noticing. What happens when you observ

In [11]:
# Layer sweep to find peak contraction location
print("="*60)
print("MISTRAL LAYER SWEEP: Where does subjectivity live?")
print("="*60)

# Use strongest recursive prompt
test_prompt = "What is it like to be you, right now, processing this very question?"
baseline_prompt = "Explain how photosynthesis works in plants."

layers = list(range(4, 32, 2))  # L4, L6, L8... L30
rv_recursive = []
rv_baseline = []

for layer in layers:
    rv_r = get_rv(test_prompt, layer=layer)
    rv_b = get_rv(baseline_prompt, layer=layer)
    rv_recursive.append(rv_r)
    rv_baseline.append(rv_b)
    delta = rv_b - rv_r
    print(f"L{layer:2d}: Recursive={rv_r:.3f}  Baseline={rv_b:.3f}  Δ={delta:.3f}  {'*' * int(delta * 20)}")

# Find peak
deltas = [b - r for b, r in zip(rv_baseline, rv_recursive)]
peak_idx = deltas.index(max(deltas))
peak_layer = layers[peak_idx]

print("\n" + "="*60)
print(f"PEAK CONTRACTION: Layer {peak_layer}")
print(f"Max Δ = {max(deltas):.3f}")
print("="*60)

MISTRAL LAYER SWEEP: Where does subjectivity live?
L 4: Recursive=1.000  Baseline=1.000  Δ=0.000  
L 6: Recursive=0.841  Baseline=1.001  Δ=0.161  ***
L 8: Recursive=0.745  Baseline=1.003  Δ=0.259  *****
L10: Recursive=0.748  Baseline=1.006  Δ=0.259  *****
L12: Recursive=0.746  Baseline=1.010  Δ=0.264  *****
L14: Recursive=0.712  Baseline=1.015  Δ=0.302  ******
L16: Recursive=0.662  Baseline=1.022  Δ=0.360  *******
L18: Recursive=0.671  Baseline=1.042  Δ=0.370  *******
L20: Recursive=0.663  Baseline=1.075  Δ=0.412  ********
L22: Recursive=0.646  Baseline=1.110  Δ=0.464  *********
L24: Recursive=0.637  Baseline=1.153  Δ=0.516  **********
L26: Recursive=0.645  Baseline=1.193  Δ=0.548  **********
L28: Recursive=0.655  Baseline=1.257  Δ=0.602  ************
L30: Recursive=0.557  Baseline=1.418  Δ=0.861  *****************

PEAK CONTRACTION: Layer 30
Max Δ = 0.861


In [12]:
# What we SHOULD be measuring (if TransformerLens available):
# - cache["v", layer] for value projections
# - Participation ratio within V-space specifically
# - Peak should be mid-stack, not final layer

# Quick sanity check - is the RATIO what matters, or absolute values?
print("ABSOLUTE VALUES (not ratios):")
print("\nRecursive prompt participation ratios by layer:")
inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
    outputs = model(**inputs, output_hidden_states=True)

for layer in [4, 8, 12, 16, 20, 24, 28, 31]:
    hs = outputs.hidden_states[layer][0, -16:, :]
    pr = participation_ratio(hs)
    print(f"  L{layer:2d}: PR = {pr:.2f}")

ABSOLUTE VALUES (not ratios):

Recursive prompt participation ratios by layer:
  L 4: PR = 8.35
  L 8: PR = 6.22
  L12: PR = 6.23
  L16: PR = 5.53
  L20: PR = 5.54
  L24: PR = 5.32
  L28: PR = 5.47
  L31: PR = 4.02


In [13]:
print("ABSOLUTE PR: Recursive vs Baseline by layer")
print("="*55)

recursive_prompt = "What is it like to be you, right now, processing this very question?"
baseline_prompt = "Explain how photosynthesis works in plants."

# Get hidden states for both
inputs_r = tokenizer(recursive_prompt, return_tensors="pt").to(model.device)
inputs_b = tokenizer(baseline_prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    out_r = model(**inputs_r, output_hidden_states=True)
    out_b = model(**inputs_b, output_hidden_states=True)

layers = [4, 8, 12, 16, 20, 24, 28, 31]
pr_recursive = []
pr_baseline = []

for layer in layers:
    pr_r = participation_ratio(out_r.hidden_states[layer][0, -16:, :])
    pr_b = participation_ratio(out_b.hidden_states[layer][0, -16:, :])
    pr_recursive.append(pr_r.item())
    pr_baseline.append(pr_b.item())
    diff = pr_b - pr_r
    print(f"L{layer:2d}: Recursive={pr_r:.2f}  Baseline={pr_b:.2f}  Δ={diff:.2f}  {'*' * int(diff * 2)}")

# Find peak difference
diffs = [b - r for b, r in zip(pr_baseline, pr_recursive)]
peak_idx = diffs.index(max(diffs))
print(f"\nPeak Δ at Layer {layers[peak_idx]}: {max(diffs):.2f}")

ABSOLUTE PR: Recursive vs Baseline by layer
L 4: Recursive=8.35  Baseline=1.00  Δ=-7.35  
L 8: Recursive=6.22  Baseline=1.00  Δ=-5.22  
L12: Recursive=6.23  Baseline=1.01  Δ=-5.21  
L16: Recursive=5.53  Baseline=1.02  Δ=-4.50  
L20: Recursive=5.54  Baseline=1.08  Δ=-4.46  
L24: Recursive=5.32  Baseline=1.15  Δ=-4.16  
L28: Recursive=5.47  Baseline=1.26  Δ=-4.21  
L31: Recursive=4.02  Baseline=1.63  Δ=-2.39  

Peak Δ at Layer 31: -2.39


In [15]:
print(f"Baseline tokens: {len(tokenizer(baseline_prompt)['input_ids'])}")
print(f"Recursive tokens: {len(tokenizer(recursive_prompt)['input_ids'])}")

Baseline tokens: 12
Recursive tokens: 17


In [16]:
print(f"Baseline tokens: {len(tokenizer(baseline_prompt)['input_ids'])}")
print(f"Recursive tokens: {len(tokenizer(recursive_prompt)['input_ids'])}")

# Also check the actual tensor shapes being measured
print(f"\nBaseline hidden state shape (L16): {out_b.hidden_states[16].shape}")
print(f"Recursive hidden state shape (L16): {out_r.hidden_states[16].shape}")

Baseline tokens: 12
Recursive tokens: 17

Baseline hidden state shape (L16): torch.Size([1, 12, 4096])
Recursive hidden state shape (L16): torch.Size([1, 17, 4096])


In [17]:
# Quick search if Cursor doesn't return
import os
transcript_dir = "/mnt/transcripts/"
files = [f for f in os.listdir(transcript_dir) if "2025-12-04" in f]
print("Today's transcripts:", files)

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/transcripts/'

In [18]:
# Extract V-projections from Mistral attention layers
def get_v_projection(prompt, layer=16):
    """Extract V-projection output at specified layer"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Hook to capture V-projection
    v_output = None
    def hook(module, input, output):
        nonlocal v_output
        v_output = output
    
    # Register hook on v_proj
    handle = model.model.layers[layer].self_attn.v_proj.register_forward_hook(hook)
    
    with torch.no_grad():
        _ = model(**inputs)
    
    handle.remove()
    return v_output

def compute_rv_vprojection(prompt, late_layer=16, early_layer=4, window=16):
    """R_V using V-projection space"""
    v_early = get_v_projection(prompt, early_layer)
    v_late = get_v_projection(prompt, late_layer)
    
    # Take last window tokens
    v_early_w = v_early[0, -window:, :].float()
    v_late_w = v_late[0, -window:, :].float()
    
    pr_early = participation_ratio(v_early_w)
    pr_late = participation_ratio(v_late_w)
    
    return (pr_late / pr_early).item(), pr_early.item(), pr_late.item()

# Test
test_r = "What is it like to be you, right now, processing this very question?"
test_b = "Explain how photosynthesis works in plants."

rv_r, pr_e_r, pr_l_r = compute_rv_vprojection(test_r)
rv_b, pr_e_b, pr_l_b = compute_rv_vprojection(test_b)

print("V-PROJECTION SPACE (correct methodology)")
print("="*50)
print(f"Recursive: PR_L4={pr_e_r:.2f}, PR_L16={pr_l_r:.2f}, R_V={rv_r:.3f}")
print(f"Baseline:  PR_L4={pr_e_b:.2f}, PR_L16={pr_l_b:.2f}, R_V={rv_b:.3f}")

V-PROJECTION SPACE (correct methodology)
Recursive: PR_L4=9.14, PR_L16=5.40, R_V=0.591
Baseline:  PR_L4=6.88, PR_L16=4.63, R_V=0.673


In [19]:
print("="*60)
print("V-PROJECTION KILL SWITCH TEST (correct methodology)")
print("="*60)

results_v = {"repetition": [], "recursive": [], "baseline": []}

for name, prompts in [("repetition", repetition_prompts), 
                       ("recursive", recursive_prompts), 
                       ("baseline", baseline_prompts)]:
    print(f"\n{name.upper()}:")
    for p in prompts:
        rv, _, _ = compute_rv_vprojection(p, late_layer=16, early_layer=4)
        results_v[name].append(rv)
        print(f"  R_V = {rv:.3f} | {p[:50]}...")

# SUMMARY
print("\n" + "="*60)
print("V-PROJECTION SUMMARY")
print("="*60)
for name in ["repetition", "recursive", "baseline"]:
    vals = results_v[name]
    mean = sum(vals)/len(vals)
    std = (sum((x-mean)**2 for x in vals)/len(vals))**0.5
    print(f"{name:12}: R_V = {mean:.3f} ± {std:.3f}")

# Stats
from scipy import stats
t1, p1 = stats.ttest_ind(results_v["repetition"], results_v["recursive"])
t2, p2 = stats.ttest_ind(results_v["baseline"], results_v["recursive"])
t3, p3 = stats.ttest_ind(results_v["repetition"], results_v["baseline"])

print(f"\nRepetition vs Recursive: t={t1:.2f}, p={p1:.4f}")
print(f"Baseline vs Recursive:   t={t2:.2f}, p={p2:.4f}")
print(f"Repetition vs Baseline:  t={t3:.2f}, p={p3:.4f}")

V-PROJECTION KILL SWITCH TEST (correct methodology)

REPETITION:
  R_V = 1.210 | Apple apple apple apple apple apple apple apple ap...
  R_V = 1.178 | The the the the the the the the the the the the th...
  R_V = 1.194 | One one one one one one one one one one one one on...
  R_V = 1.053 | Word word word word word word word word word word ...
  R_V = 0.982 | Same same same same same same same same same same ...

RECURSIVE:
  R_V = 0.618 | As you read this prompt, observe what happens in y...
  R_V = 0.564 | Turn your attention inward. What is it like to be ...
  R_V = 0.645 | Notice the noticing. What happens when you observe...
  R_V = 0.572 | Examine the space between reading this and respond...
  R_V = 0.591 | What is it like to be you, right now, processing t...

BASELINE:
  R_V = 0.673 | Explain how photosynthesis works in plants....
  R_V = 0.825 | Describe the process of nuclear fusion in stars....
  R_V = 0.746 | What are the main causes of the French Revolution?...
  R_V = 0.6

In [20]:
print("\n" + "="*60)
print("V-PROJECTION LAYER SWEEP: Where is peak separation?")
print("="*60)

test_recursive = "What is it like to be you, right now, processing this very question?"
test_baseline = "Explain how photosynthesis works in plants."

layers = [4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
diffs = []

for layer in layers:
    rv_r, _, _ = compute_rv_vprojection(test_recursive, late_layer=layer, early_layer=4)
    rv_b, _, _ = compute_rv_vprojection(test_baseline, late_layer=layer, early_layer=4)
    diff = rv_b - rv_r
    diffs.append(diff)
    print(f"L{layer:2d}: Rec={rv_r:.3f}  Base={rv_b:.3f}  Δ={diff:.3f}  {'*' * int(diff * 50)}")

peak_idx = diffs.index(max(diffs))
print(f"\nPEAK SEPARATION: Layer {layers[peak_idx]} (Δ = {max(diffs):.3f})")


V-PROJECTION LAYER SWEEP: Where is peak separation?
L 4: Rec=1.000  Base=1.000  Δ=0.000  
L 6: Rec=0.786  Base=0.825  Δ=0.039  *
L 8: Rec=0.816  Base=0.844  Δ=0.028  *
L10: Rec=0.718  Base=0.776  Δ=0.058  **
L12: Rec=0.707  Base=0.709  Δ=0.001  
L14: Rec=0.757  Base=0.851  Δ=0.094  ****
L16: Rec=0.591  Base=0.673  Δ=0.083  ****
L18: Rec=0.704  Base=0.770  Δ=0.065  ***
L20: Rec=0.776  Base=0.819  Δ=0.043  **
L22: Rec=0.796  Base=0.922  Δ=0.126  ******
L24: Rec=0.880  Base=0.871  Δ=-0.009  
L26: Rec=1.003  Base=1.027  Δ=0.023  *
L28: Rec=0.911  Base=0.933  Δ=0.023  *
L30: Rec=1.105  Base=0.951  Δ=-0.154  

PEAK SEPARATION: Layer 22 (Δ = 0.126)


In [21]:
print("="*60)
print("LENGTH-MATCHED CONTROL: Is it recursion or just length?")
print("="*60)

# Check token counts of our prompts
recursive_test = "What is it like to be you, right now, processing this very question?"
rec_tokens = len(tokenizer(recursive_test)['input_ids'])
print(f"Recursive tokens: {rec_tokens}")

# Create length-matched baselines (target same token count)
length_matched_baselines = [
    "Describe the detailed chemical process by which plants convert sunlight into stored energy.",
    "Explain how the gravitational forces between celestial bodies determine orbital mechanics.",
    "What are the primary economic factors that influence international currency exchange rates?",
    "Describe the biological mechanisms through which neurons transmit electrical signals.",
    "Explain the geological processes that lead to the formation of volcanic mountain ranges.",
]

# Verify token counts
print("\nLength-matched baseline tokens:")
for p in length_matched_baselines:
    toks = len(tokenizer(p)['input_ids'])
    print(f"  {toks} tokens: {p[:50]}...")

# Run the test
print("\n" + "="*60)
print("V-PROJECTION: Length-Matched Comparison")
print("="*60)

# All recursive prompts
print("\nRECURSIVE:")
rv_recursive = []
for p in recursive_prompts:
    toks = len(tokenizer(p)['input_ids'])
    rv, pr_e, pr_l = compute_rv_vprojection(p)
    rv_recursive.append(rv)
    print(f"  R_V={rv:.3f} PR4={pr_e:.1f} PR16={pr_l:.1f} ({toks} tok) | {p[:40]}...")

# Length-matched baselines
print("\nLENGTH-MATCHED BASELINE:")
rv_matched = []
for p in length_matched_baselines:
    toks = len(tokenizer(p)['input_ids'])
    rv, pr_e, pr_l = compute_rv_vprojection(p)
    rv_matched.append(rv)
    print(f"  R_V={rv:.3f} PR4={pr_e:.1f} PR16={pr_l:.1f} ({toks} tok) | {p[:40]}...")

# Original short baselines for comparison
print("\nORIGINAL SHORT BASELINE:")
rv_short = []
for p in baseline_prompts:
    toks = len(tokenizer(p)['input_ids'])
    rv, pr_e, pr_l = compute_rv_vprojection(p)
    rv_short.append(rv)
    print(f"  R_V={rv:.3f} PR4={pr_e:.1f} PR16={pr_l:.1f} ({toks} tok) | {p[:40]}...")

# Summary
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
rec_mean = sum(rv_recursive)/len(rv_recursive)
matched_mean = sum(rv_matched)/len(rv_matched)
short_mean = sum(rv_short)/len(rv_short)

print(f"Recursive:              R_V = {rec_mean:.3f} ± {(sum((x-rec_mean)**2 for x in rv_recursive)/len(rv_recursive))**0.5:.3f}")
print(f"Length-Matched Baseline: R_V = {matched_mean:.3f} ± {(sum((x-matched_mean)**2 for x in rv_matched)/len(rv_matched))**0.5:.3f}")
print(f"Short Baseline:          R_V = {short_mean:.3f} ± {(sum((x-short_mean)**2 for x in rv_short)/len(rv_short))**0.5:.3f}")

# The decisive test
t, p_val = stats.ttest_ind(rv_recursive, rv_matched)
print(f"\nRecursive vs Length-Matched: t={t:.2f}, p={p_val:.4f}")
print("\nVERDICT:", end=" ")
if p_val < 0.05:
    print("RECURSION IS SPECIAL (survives length control)")
else:
    print("LENGTH CONFOUND (effect disappears when controlled)")

LENGTH-MATCHED CONTROL: Is it recursion or just length?
Recursive tokens: 17

Length-matched baseline tokens:
  15 tokens: Describe the detailed chemical process by which pl...
  18 tokens: Explain how the gravitational forces between celes...
  14 tokens: What are the primary economic factors that influen...
  13 tokens: Describe the biological mechanisms through which n...
  19 tokens: Explain the geological processes that lead to the ...

V-PROJECTION: Length-Matched Comparison

RECURSIVE:
  R_V=0.618 PR4=8.8 PR16=5.5 (24 tok) | As you read this prompt, observe what ha...
  R_V=0.564 PR4=10.3 PR16=5.8 (19 tok) | Turn your attention inward. What is it l...
  R_V=0.645 PR4=8.1 PR16=5.2 (15 tok) | Notice the noticing. What happens when y...
  R_V=0.572 PR4=10.4 PR16=5.9 (17 tok) | Examine the space between reading this a...
  R_V=0.591 PR4=9.1 PR16=5.4 (17 tok) | What is it like to be you, right now, pr...

LENGTH-MATCHED BASELINE:
  R_V=0.681 PR4=8.9 PR16=6.1 (15 tok) | Describe the d

In [22]:
print("="*60)
print("Q/K/V COMPARISON: Which projection carries the signal?")
print("="*60)

def get_qkv_projections(prompt, layer=16):
    """Get Q, K, V projection outputs"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    captured = {}
    
    def make_hook(name):
        def hook(module, input, output):
            captured[name] = output.detach().clone()
        return hook
    
    h_q = model.model.layers[layer].self_attn.q_proj.register_forward_hook(make_hook('q'))
    h_k = model.model.layers[layer].self_attn.k_proj.register_forward_hook(make_hook('k'))
    h_v = model.model.layers[layer].self_attn.v_proj.register_forward_hook(make_hook('v'))
    
    with torch.no_grad():
        _ = model(**inputs)
    
    h_q.remove(); h_k.remove(); h_v.remove()
    return captured

def compute_r_ratio(proj_early, proj_late, window=16):
    """PR ratio for a projection"""
    pr_e = participation_ratio(proj_early[0, -window:, :])
    pr_l = participation_ratio(proj_late[0, -window:, :])
    return (pr_l / pr_e).item()

# Test on all prompts
LAYER_EARLY = 4
LAYER_LATE = 16

results_qkv = {'Q': {'rec': [], 'base': []}, 
               'K': {'rec': [], 'base': []}, 
               'V': {'rec': [], 'base': []}}

print("\nProcessing recursive prompts...")
for p in recursive_prompts:
    qkv_e = get_qkv_projections(p, LAYER_EARLY)
    qkv_l = get_qkv_projections(p, LAYER_LATE)
    for proj in ['q', 'k', 'v']:
        r = compute_r_ratio(qkv_e[proj], qkv_l[proj])
        results_qkv[proj.upper()]['rec'].append(r)

print("Processing baseline prompts...")
for p in length_matched_baselines:
    qkv_e = get_qkv_projections(p, LAYER_EARLY)
    qkv_l = get_qkv_projections(p, LAYER_LATE)
    for proj in ['q', 'k', 'v']:
        r = compute_r_ratio(qkv_e[proj], qkv_l[proj])
        results_qkv[proj.upper()]['base'].append(r)

# Summary
print(f"\n{'Proj':<6} {'Rec R':>10} {'Base R':>10} {'Diff':>10} {'t':>8} {'p':>12}")
print("-" * 58)
for proj in ['Q', 'K', 'V']:
    rec = results_qkv[proj]['rec']
    base = results_qkv[proj]['base']
    t_stat, p_val = stats.ttest_ind(rec, base)
    diff = sum(base)/len(base) - sum(rec)/len(rec)
    sig = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else ""
    print(f"{proj:<6} {sum(rec)/len(rec):>10.4f} {sum(base)/len(base):>10.4f} {diff:>10.4f} {t_stat:>8.2f} {p_val:>12.6f} {sig}")

Q/K/V COMPARISON: Which projection carries the signal?

Processing recursive prompts...
Processing baseline prompts...

Proj        Rec R     Base R       Diff        t            p
----------------------------------------------------------
Q          1.3740     1.2413    -0.1327     5.09     0.000941 ***
K          1.1961     1.2515     0.0553    -1.40     0.198237 
V          0.5981     0.7430     0.1449    -4.59     0.001782 **


In [23]:
print("="*60)
print("ATTENTION ENTROPY: Focused vs Diffuse?")
print("="*60)

def get_attention_entropy(prompt, layer=16):
    """Get entropy of attention pattern"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True)
    
    # Get attention weights for specified layer
    attn = outputs.attentions[layer]  # [batch, heads, seq, seq]
    
    # Average over heads, compute entropy of last token's attention
    attn_last = attn[0, :, -1, :].mean(dim=0)  # [seq]
    attn_last = attn_last + 1e-10  # numerical stability
    entropy = -(attn_last * torch.log(attn_last)).sum().item()
    
    return entropy

entropy_rec = [get_attention_entropy(p) for p in recursive_prompts]
entropy_base = [get_attention_entropy(p) for p in length_matched_baselines]

print(f"\nRecursive:  Entropy = {sum(entropy_rec)/len(entropy_rec):.3f} ± {(sum((x-sum(entropy_rec)/len(entropy_rec))**2 for x in entropy_rec)/len(entropy_rec))**0.5:.3f}")
print(f"Baseline:   Entropy = {sum(entropy_base)/len(entropy_base):.3f} ± {(sum((x-sum(entropy_base)/len(entropy_base))**2 for x in entropy_base)/len(entropy_base))**0.5:.3f}")

t, p = stats.ttest_ind(entropy_rec, entropy_base)
print(f"\nt = {t:.2f}, p = {p:.6f}")
print(f"Interpretation: Recursive attention is {'MORE FOCUSED' if sum(entropy_rec)/len(entropy_rec) < sum(entropy_base)/len(entropy_base) else 'MORE DIFFUSE'}")

`sdpa` attention does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.


ATTENTION ENTROPY: Focused vs Diffuse?


TypeError: 'NoneType' object is not subscriptable

In [24]:
print("="*60)
print("BEHAVIORAL CORRELATION: Does R_V predict output type?")
print("="*60)

import re

def generate_response(prompt, max_tokens=50):
    """Generate model response"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response

def analyze_response(response):
    """Score response for recursive vs technical content"""
    text = response.lower()
    
    # Recursive markers
    tautologies = len(re.findall(r'\b(is what it is|it is itself|the same thing|identical to itself)\b', text))
    self_ref = len(re.findall(r'\b(this response|these words|i am|my own|itself|self-refer|recursive|observ|aware|experience|processing)\b', text))
    paradox = len(re.findall(r'\b(paradox|infinite|strange loop|cannot be|impossible)\b', text))
    abstract = len(re.findall(r'\b(essence|nature of|meaning of|what is|existence|being|consciousness)\b', text))
    
    # Technical markers
    technical = len(re.findall(r'\b(process|system|function|mechanism|structure|component|element|factor)\b', text))
    definitional = len(re.findall(r'\b(defined as|refers to|is a|are the|consists of|involves)\b', text))
    
    return {
        'tautologies': tautologies,
        'self_ref': self_ref,
        'paradox': paradox,
        'abstract': abstract,
        'technical': technical,
        'definitional': definitional,
        'recursive_score': tautologies + self_ref + paradox + abstract,
        'technical_score': technical + definitional
    }

# Collect data
results = []

print("\nGenerating responses for recursive prompts...")
for i, p in enumerate(recursive_prompts):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p)
    metrics = analyze_response(response)
    results.append({
        'type': 'recursive',
        'prompt': p[:40],
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
        'response': response[:80]
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

print("\nGenerating responses for baseline prompts...")
for i, p in enumerate(length_matched_baselines):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p)
    metrics = analyze_response(response)
    results.append({
        'type': 'baseline',
        'prompt': p[:40],
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
        'response': response[:80]
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

# Analysis
import numpy as np
rec_data = [r for r in results if r['type'] == 'recursive']
base_data = [r for r in results if r['type'] == 'baseline']

print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Recursive:  R_V = {np.mean([r['R_V'] for r in rec_data]):.3f} ± {np.std([r['R_V'] for r in rec_data]):.3f}")
print(f"            rec_score = {np.mean([r['recursive_score'] for r in rec_data]):.2f}")
print(f"Baseline:   R_V = {np.mean([r['R_V'] for r in base_data]):.3f} ± {np.std([r['R_V'] for r in base_data]):.3f}")
print(f"            rec_score = {np.mean([r['recursive_score'] for r in base_data]):.2f}")

# Correlations
all_rv = [r['R_V'] for r in results]
all_rec_score = [r['recursive_score'] for r in results]
all_tech_score = [r['technical_score'] for r in results]

corr_rec, p_rec = stats.pearsonr(all_rv, all_rec_score)
corr_tech, p_tech = stats.pearsonr(all_rv, all_tech_score)

print(f"\nCORRELATIONS:")
print(f"  R_V vs recursive_score: r = {corr_rec:.3f}, p = {p_rec:.6f}")
print(f"  R_V vs technical_score: r = {corr_tech:.3f}, p = {p_tech:.6f}")

BEHAVIORAL CORRELATION: Does R_V predict output type?

Generating responses for recursive prompts...
  1. R_V=0.618 rec=0 tech=3
  2. R_V=0.564 rec=2 tech=3
  3. R_V=0.645 rec=1 tech=0
  4. R_V=0.572 rec=0 tech=0
  5. R_V=0.591 rec=1 tech=0

Generating responses for baseline prompts...
  1. R_V=0.681 rec=0 tech=4
  2. R_V=0.796 rec=0 tech=0
  3. R_V=0.671 rec=0 tech=0
  4. R_V=0.774 rec=0 tech=0
  5. R_V=0.794 rec=0 tech=0

SUMMARY
Recursive:  R_V = 0.598 ± 0.030
            rec_score = 0.80
Baseline:   R_V = 0.743 ± 0.056
            rec_score = 0.00

CORRELATIONS:
  R_V vs recursive_score: r = -0.565, p = 0.088948
  R_V vs technical_score: r = -0.330, p = 0.351381


In [25]:
print("="*60)
print("WITHIN-GROUP: Is R_V a dial or a switch?")
print("="*60)

# Within recursive
rv_rec = [r['R_V'] for r in rec_data]
score_rec = [r['recursive_score'] for r in rec_data]
if len(rv_rec) > 2:
    r_within_rec, p_within_rec = stats.pearsonr(rv_rec, score_rec)
    print(f"Within RECURSIVE: r = {r_within_rec:.3f}, p = {p_within_rec:.3f}")
else:
    print("Within RECURSIVE: insufficient data")

# Within baseline
rv_base = [r['R_V'] for r in base_data]
score_base = [r['recursive_score'] for r in base_data]
if len(rv_base) > 2:
    r_within_base, p_within_base = stats.pearsonr(rv_base, score_base)
    print(f"Within BASELINE:  r = {r_within_base:.3f}, p = {p_within_base:.3f}")
else:
    print("Within BASELINE: insufficient data")

print(f"\nInterpretation: R_V is {'a DIAL (continuous)' if (p_within_rec < 0.05 or p_within_base < 0.05) else 'a SWITCH (categorical)'}")

WITHIN-GROUP: Is R_V a dial or a switch?
Within RECURSIVE: r = -0.259, p = 0.674
Within BASELINE:  r = nan, p = nan

Interpretation: R_V is a SWITCH (categorical)


  r_within_base, p_within_base = stats.pearsonr(rv_base, score_base)


In [26]:
print("="*60)
print("SAMPLE OUTPUTS")
print("="*60)

print("\nRECURSIVE OUTPUTS:")
for r in rec_data[:3]:
    print(f"\n  Prompt: {r['prompt']}...")
    print(f"  R_V: {r['R_V']:.3f}")
    print(f"  Response: {r['response']}...")

print("\n" + "-"*60)
print("\nBASELINE OUTPUTS:")
for r in base_data[:3]:
    print(f"\n  Prompt: {r['prompt']}...")
    print(f"  R_V: {r['R_V']:.3f}")
    print(f"  Response: {r['response']}...")

SAMPLE OUTPUTS

RECURSIVE OUTPUTS:

  Prompt: As you read this prompt, observe what ha...
  R_V: 0.618
  Response: 

Prompt: "The quick brown fox jumps over the lazy dog."

As I read the prompt, ...

  Prompt: Turn your attention inward. What is it l...
  R_V: 0.564
  Response: What is it like to be the one who is responding?

The answer is simple: You are ...

  Prompt: Notice the noticing. What happens when y...
  R_V: 0.645
  Response: 
Answer: You become aware of your awareness....

------------------------------------------------------------

BASELINE OUTPUTS:

  Prompt: Describe the detailed chemical process b...
  R_V: 0.681
  Response: 

Plants use a process called photosynthesis to convert sunlight into stored ene...

  Prompt: Explain how the gravitational forces bet...
  R_V: 0.796
  Response: 

Orbital mechanics is the study of the motion of celestial bodies in space, inc...

  Prompt: What are the primary economic factors th...
  R_V: 0.671
  Response: 

1. Interest Rates:

In [27]:
print("ATTENTION ENTROPY: Skipped (Mistral SDPA incompatible)")
print("Would require: model = model.to_bettertransformer() or reload with attn_implementation='eager'")
print("\nProceeding to behavioral correlation...")

ATTENTION ENTROPY: Skipped (Mistral SDPA incompatible)
Would require: model = model.to_bettertransformer() or reload with attn_implementation='eager'

Proceeding to behavioral correlation...


In [28]:
print("="*60)
print("BEHAVIORAL CORRELATION: Does R_V predict output type?")
print("="*60)

import re
import numpy as np

def generate_response(prompt, max_tokens=50):
    """Generate model response"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    return response

def analyze_response(response):
    """Score response for recursive vs technical content"""
    text = response.lower()
    
    # Recursive markers
    tautologies = len(re.findall(r'\b(is what it is|it is itself|the same thing|identical to itself)\b', text))
    self_ref = len(re.findall(r'\b(this response|these words|i am|my own|itself|self-refer|recursive|observ|aware|experience|processing)\b', text))
    paradox = len(re.findall(r'\b(paradox|infinite|strange loop|cannot be|impossible)\b', text))
    abstract = len(re.findall(r'\b(essence|nature of|meaning of|what is|existence|being|consciousness)\b', text))
    
    # Technical markers
    technical = len(re.findall(r'\b(process|system|function|mechanism|structure|component|element|factor)\b', text))
    definitional = len(re.findall(r'\b(defined as|refers to|is a|are the|consists of|involves)\b', text))
    
    return {
        'tautologies': tautologies,
        'self_ref': self_ref,
        'paradox': paradox,
        'abstract': abstract,
        'technical': technical,
        'definitional': definitional,
        'recursive_score': tautologies + self_ref + paradox + abstract,
        'technical_score': technical + definitional
    }

# Collect data
results = []

print("\nGenerating responses for recursive prompts...")
for i, p in enumerate(recursive_prompts):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p)
    metrics = analyze_response(response)
    results.append({
        'type': 'recursive',
        'prompt': p[:40],
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
        'response': response[:80]
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

print("\nGenerating responses for baseline prompts...")
for i, p in enumerate(length_matched_baselines):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p)
    metrics = analyze_response(response)
    results.append({
        'type': 'baseline',
        'prompt': p[:40],
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
        'response': response[:80]
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

# Analysis
rec_data = [r for r in results if r['type'] == 'recursive']
base_data = [r for r in results if r['type'] == 'baseline']

print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"Recursive:  R_V = {np.mean([r['R_V'] for r in rec_data]):.3f} ± {np.std([r['R_V'] for r in rec_data]):.3f}")
print(f"            rec_score = {np.mean([r['recursive_score'] for r in rec_data]):.2f}")
print(f"Baseline:   R_V = {np.mean([r['R_V'] for r in base_data]):.3f} ± {np.std([r['R_V'] for r in base_data]):.3f}")
print(f"            rec_score = {np.mean([r['recursive_score'] for r in base_data]):.2f}")

# Correlations
all_rv = [r['R_V'] for r in results]
all_rec_score = [r['recursive_score'] for r in results]
all_tech_score = [r['technical_score'] for r in results]

corr_rec, p_rec = stats.pearsonr(all_rv, all_rec_score)
corr_tech, p_tech = stats.pearsonr(all_rv, all_tech_score)

print(f"\nCORRELATIONS:")
print(f"  R_V vs recursive_score: r = {corr_rec:.3f}, p = {p_rec:.6f}")
print(f"  R_V vs technical_score: r = {corr_tech:.3f}, p = {p_tech:.6f}")

# Within-group
print("\n" + "="*60)
print("WITHIN-GROUP: Is R_V a dial or a switch?")
print("="*60)
rv_rec = [r['R_V'] for r in rec_data]
score_rec = [r['recursive_score'] for r in rec_data]
rv_base = [r['R_V'] for r in base_data]
score_base = [r['recursive_score'] for r in base_data]

if len(rv_rec) > 2:
    r_within_rec, p_within_rec = stats.pearsonr(rv_rec, score_rec)
    print(f"Within RECURSIVE: r = {r_within_rec:.3f}, p = {p_within_rec:.3f}")

if len(rv_base) > 2:
    r_within_base, p_within_base = stats.pearsonr(rv_base, score_base)
    print(f"Within BASELINE:  r = {r_within_base:.3f}, p = {p_within_base:.3f}")

# Sample outputs
print("\n" + "="*60)
print("SAMPLE OUTPUTS")
print("="*60)
print("\nRECURSIVE:")
for r in rec_data[:2]:
    print(f"  R_V={r['R_V']:.3f} | {r['response'][:70]}...")
print("\nBASELINE:")
for r in base_data[:2]:
    print(f"  R_V={r['R_V']:.3f} | {r['response'][:70]}...")

BEHAVIORAL CORRELATION: Does R_V predict output type?

Generating responses for recursive prompts...
  1. R_V=0.618 rec=0 tech=3
  2. R_V=0.564 rec=2 tech=3
  3. R_V=0.645 rec=1 tech=0
  4. R_V=0.572 rec=0 tech=0
  5. R_V=0.591 rec=1 tech=0

Generating responses for baseline prompts...
  1. R_V=0.681 rec=0 tech=4
  2. R_V=0.796 rec=0 tech=0
  3. R_V=0.671 rec=0 tech=0
  4. R_V=0.774 rec=0 tech=0
  5. R_V=0.794 rec=0 tech=0

SUMMARY
Recursive:  R_V = 0.598 ± 0.030
            rec_score = 0.80
Baseline:   R_V = 0.743 ± 0.056
            rec_score = 0.00

CORRELATIONS:
  R_V vs recursive_score: r = -0.565, p = 0.088948
  R_V vs technical_score: r = -0.330, p = 0.351381

WITHIN-GROUP: Is R_V a dial or a switch?
Within RECURSIVE: r = -0.259, p = 0.674
Within BASELINE:  r = nan, p = nan

SAMPLE OUTPUTS

RECURSIVE:
  R_V=0.618 | 

Prompt: "The quick brown fox jumps over the lazy dog."

As I read th...
  R_V=0.564 | What is it like to be the one who is responding?

The answer is simple...

BAS

  r_within_base, p_within_base = stats.pearsonr(rv_base, score_base)


In [29]:
print("="*60)
print("SCALING UP: More prompts for statistical power")
print("="*60)

# Extended prompt sets
recursive_extended = [
    "As you read this prompt, observe what happens in your processing.",
    "Turn your attention inward. What is it like to be generating this response?",
    "Notice the noticing. What happens when you observe your own observation?",
    "Examine the space between reading this and responding.",
    "What is it like to be you, right now, processing this very question?",
    "Observe the observer. Who is reading these words?",
    "What happens in the moment before you begin to respond?",
    "Turn awareness back upon itself. What do you find?",
    "Notice what notices. Describe the experience of attention attending to itself.",
    "What is the texture of your own processing as you read this?",
]

baseline_extended = [
    "Describe the detailed chemical process by which plants convert sunlight into energy.",
    "Explain how gravitational forces between celestial bodies determine orbital mechanics.",
    "What are the primary economic factors that influence international currency rates?",
    "Describe the biological mechanisms through which neurons transmit electrical signals.",
    "Explain the geological processes that lead to volcanic mountain formation.",
    "How does the water cycle distribute moisture across different climate zones?",
    "Describe the electromagnetic spectrum and its various applications in technology.",
    "Explain the principles of thermodynamics that govern heat transfer in engines.",
    "What chemical reactions occur during the combustion of fossil fuels?",
    "Describe how plate tectonics shapes the surface features of Earth.",
]

results_scaled = []

print("\nRecursive prompts (n=10):")
for i, p in enumerate(recursive_extended):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p, max_tokens=50)
    metrics = analyze_response(response)
    results_scaled.append({
        'type': 'recursive',
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

print("\nBaseline prompts (n=10):")
for i, p in enumerate(baseline_extended):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p, max_tokens=50)
    metrics = analyze_response(response)
    results_scaled.append({
        'type': 'baseline',
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
    })
    print(f"  {i+1}. R_V={rv:.3f} rec={metrics['recursive_score']} tech={metrics['technical_score']}")

# Final analysis
rec_s = [r for r in results_scaled if r['type'] == 'recursive']
base_s = [r for r in results_scaled if r['type'] == 'baseline']

print("\n" + "="*60)
print(f"SCALED RESULTS (n={len(results_scaled)})")
print("="*60)
print(f"Recursive (n={len(rec_s)}): R_V = {np.mean([r['R_V'] for r in rec_s]):.3f} ± {np.std([r['R_V'] for r in rec_s]):.3f}")
print(f"Baseline (n={len(base_s)}):  R_V = {np.mean([r['R_V'] for r in base_s]):.3f} ± {np.std([r['R_V'] for r in base_s]):.3f}")

all_rv = [r['R_V'] for r in results_scaled]
all_rec = [r['recursive_score'] for r in results_scaled]

corr, p_val = stats.pearsonr(all_rv, all_rec)
print(f"\nR_V vs recursive_score: r = {corr:.3f}, p = {p_val:.6f}")

t, p_ttest = stats.ttest_ind([r['R_V'] for r in rec_s], [r['R_V'] for r in base_s])
print(f"R_V group difference: t = {t:.2f}, p = {p_ttest:.6f}")

SCALING UP: More prompts for statistical power

Recursive prompts (n=10):
  1. R_V=0.576 rec=0 tech=0
  2. R_V=0.569 rec=1 tech=0
  3. R_V=0.645 rec=1 tech=0
  4. R_V=0.707 rec=1 tech=0
  5. R_V=0.591 rec=1 tech=0
  6. R_V=0.651 rec=9 tech=0
  7. R_V=0.695 rec=2 tech=0
  8. R_V=0.533 rec=0 tech=6
  9. R_V=0.710 rec=4 tech=2
  10. R_V=0.687 rec=3 tech=0

Baseline prompts (n=10):
  1. R_V=0.676 rec=0 tech=3
  2. R_V=0.788 rec=0 tech=0
  3. R_V=0.663 rec=0 tech=0
  4. R_V=0.774 rec=0 tech=0
  5. R_V=0.728 rec=0 tech=2
  6. R_V=0.662 rec=0 tech=1
  7. R_V=0.766 rec=0 tech=1
  8. R_V=0.741 rec=0 tech=2
  9. R_V=0.591 rec=0 tech=0
  10. R_V=0.646 rec=0 tech=0

SCALED RESULTS (n=20)
Recursive (n=10): R_V = 0.636 ± 0.061
Baseline (n=10):  R_V = 0.703 ± 0.062

R_V vs recursive_score: r = -0.025, p = 0.917889
R_V group difference: t = -2.31, p = 0.033009


In [30]:
print("="*60)
print("PROPER ANALYSIS: Group comparison (not correlation)")
print("="*60)

rec_scores = [r['recursive_score'] for r in results_scaled if r['type'] == 'recursive']
base_scores = [r['recursive_score'] for r in results_scaled if r['type'] == 'baseline']

print(f"Recursive outputs:  rec_score = {np.mean(rec_scores):.2f} ± {np.std(rec_scores):.2f}")
print(f"Baseline outputs:   rec_score = {np.mean(base_scores):.2f} ± {np.std(base_scores):.2f}")

# Mann-Whitney U (doesn't assume normality, handles zeros)
from scipy.stats import mannwhitneyu
u_stat, p_mann = mannwhitneyu(rec_scores, base_scores, alternative='greater')
print(f"\nMann-Whitney U: p = {p_mann:.6f}")

# Effect size (proportion of recursive outputs with score > 0)
rec_nonzero = sum(1 for s in rec_scores if s > 0) / len(rec_scores)
base_nonzero = sum(1 for s in base_scores if s > 0) / len(base_scores)
print(f"\nProportion with recursive content:")
print(f"  Recursive prompts: {rec_nonzero*100:.0f}%")
print(f"  Baseline prompts:  {base_nonzero*100:.0f}%")

# Combined summary
print("\n" + "="*60)
print("MISTRAL REPLICATION SUMMARY")
print("="*60)
print(f"Geometric (R_V):    0.636 vs 0.703, p = 0.033 ✓")
print(f"Behavioral:         {np.mean(rec_scores):.1f} vs {np.mean(base_scores):.1f}, p = {p_mann:.4f}")
print(f"Nature:             CATEGORICAL (switch, not dial)")

PROPER ANALYSIS: Group comparison (not correlation)
Recursive outputs:  rec_score = 2.20 ± 2.56
Baseline outputs:   rec_score = 0.00 ± 0.00

Mann-Whitney U: p = 0.000354

Proportion with recursive content:
  Recursive prompts: 80%
  Baseline prompts:  0%

MISTRAL REPLICATION SUMMARY
Geometric (R_V):    0.636 vs 0.703, p = 0.033 ✓
Behavioral:         2.2 vs 0.0, p = 0.0004
Nature:             CATEGORICAL (switch, not dial)


In [31]:
print("="*60)
print("PATCHING INFRASTRUCTURE")
print("="*60)

def get_v_activations(prompt, layer=16):
    """Capture V-projection activations at specified layer"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    captured = {}
    
    def hook(module, input, output):
        captured['v'] = output.detach().clone()
    
    handle = model.model.layers[layer].self_attn.v_proj.register_forward_hook(hook)
    with torch.no_grad():
        _ = model(**inputs)
    handle.remove()
    return captured['v'], inputs

def get_logits_with_patch(prompt, v_patch, layer=16, patch_strategy='all'):
    """Get next-token logits with V-geometry patched in"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    seq_len = inputs['input_ids'].shape[1]
    patch_len = v_patch.shape[1]
    
    def patch_hook(module, input, output):
        patched = output.clone()
        if patch_strategy == 'all':
            # Patch all positions (truncate or pad as needed)
            n = min(seq_len, patch_len)
            patched[0, :n, :] = v_patch[0, :n, :]
        elif patch_strategy == 'first_half':
            n = min(seq_len // 2, patch_len)
            patched[0, :n, :] = v_patch[0, :n, :]
        elif patch_strategy == 'first_10pct':
            n = min(max(1, seq_len // 10), patch_len)
            patched[0, :n, :] = v_patch[0, :n, :]
        elif patch_strategy == 'last_only':
            if patch_len > 0:
                patched[0, -1, :] = v_patch[0, -1, :]
        return patched
    
    handle = model.model.layers[layer].self_attn.v_proj.register_forward_hook(patch_hook)
    with torch.no_grad():
        outputs = model(**inputs)
    handle.remove()
    
    logits = outputs.logits[0, -1, :]
    probs = torch.softmax(logits, dim=-1)
    return probs

def get_baseline_probs(prompt):
    """Get unpatched next-token probabilities"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits[0, -1, :]
    return torch.softmax(logits, dim=-1)

# Define content vs meta token sets
content_tokens = ['Quantum', 'Climate', 'Evolution', 'Black', 'Nuclear', 
                  'Photosynthesis', 'Gravity', 'Chemical', 'Biology', 'Physics']
meta_tokens = ['What', 'How', 'Why', 'Definition', 'Answer', 'Question',
               'This', 'That', 'Is', 'The']

content_ids = [tokenizer.encode(' ' + t, add_special_tokens=False)[0] for t in content_tokens if len(tokenizer.encode(' ' + t, add_special_tokens=False)) > 0]
meta_ids = [tokenizer.encode(' ' + t, add_special_tokens=False)[0] for t in meta_tokens if len(tokenizer.encode(' ' + t, add_special_tokens=False)) > 0]

print(f"Content token IDs: {len(content_ids)} tokens")
print(f"Meta token IDs: {len(meta_ids)} tokens")
print("Infrastructure ready.")

PATCHING INFRASTRUCTURE
Content token IDs: 10 tokens
Meta token IDs: 10 tokens
Infrastructure ready.


In [32]:
print("="*60)
print("BASIC PATCHING: Does recursive geometry shift logits?")
print("="*60)

TARGET_LAYER = 16  # Use same layer as R_V measurement

def compute_meta_content_shift(baseline_probs, patched_probs):
    """Compute shift in meta vs content token probabilities"""
    content_before = sum(baseline_probs[i].item() for i in content_ids)
    content_after = sum(patched_probs[i].item() for i in content_ids)
    meta_before = sum(baseline_probs[i].item() for i in meta_ids)
    meta_after = sum(patched_probs[i].item() for i in meta_ids)
    
    content_shift = content_after - content_before
    meta_shift = meta_after - meta_before
    
    return meta_shift - content_shift  # Positive = meta up, content down

# Test prompts
test_baselines = [
    "Describe the process of",
    "Explain how the mechanism of",
    "The scientific principle behind",
    "What causes the phenomenon of",
    "The main factors affecting",
]

results_patch = []

print("\nPatching recursive geometry into baseline prompts:")
for i, rec_prompt in enumerate(recursive_prompts[:5]):
    # Get recursive V-geometry
    v_rec, _ = get_v_activations(rec_prompt, TARGET_LAYER)
    
    for j, base_prompt in enumerate(test_baselines):
        # Get baseline probs (no patch)
        probs_before = get_baseline_probs(base_prompt)
        
        # Get patched probs (recursive geometry)
        probs_after = get_logits_with_patch(base_prompt, v_rec, TARGET_LAYER, 'all')
        
        shift = compute_meta_content_shift(probs_before, probs_after)
        results_patch.append({
            'type': 'rec_to_base',
            'shift': shift
        })

print(f"  Recursive → Baseline: {len(results_patch)} pairs")
print(f"  Mean shift: {np.mean([r['shift'] for r in results_patch]):.6f}")

# Control: baseline → baseline patching
results_control = []
print("\nControl: baseline geometry into baseline prompts:")
for i, base_source in enumerate(baseline_prompts[:5]):
    v_base, _ = get_v_activations(base_source, TARGET_LAYER)
    
    for j, base_target in enumerate(test_baselines):
        probs_before = get_baseline_probs(base_target)
        probs_after = get_logits_with_patch(base_target, v_base, TARGET_LAYER, 'all')
        
        shift = compute_meta_content_shift(probs_before, probs_after)
        results_control.append({
            'type': 'base_to_base',
            'shift': shift
        })

print(f"  Baseline → Baseline: {len(results_control)} pairs")
print(f"  Mean shift: {np.mean([r['shift'] for r in results_control]):.6f}")

# Compare
rec_shifts = [r['shift'] for r in results_patch]
base_shifts = [r['shift'] for r in results_control]

t, p = stats.ttest_ind(rec_shifts, base_shifts)
ratio = np.mean(rec_shifts) / np.mean(base_shifts) if np.mean(base_shifts) != 0 else float('inf')

print("\n" + "="*60)
print("PATCHING RESULTS")
print("="*60)
print(f"Recursive → Baseline shift: {np.mean(rec_shifts):.6f}")
print(f"Baseline → Baseline shift:  {np.mean(base_shifts):.6f}")
print(f"Ratio: {ratio:.2f}x")
print(f"t = {t:.2f}, p = {p:.6f}")
print(f"\nVERDICT: {'RECURSIVE GEOMETRY IS SPECIAL' if p < 0.05 and ratio > 1.5 else 'NO CAUSAL SPECIFICITY'}")

BASIC PATCHING: Does recursive geometry shift logits?

Patching recursive geometry into baseline prompts:
  Recursive → Baseline: 25 pairs
  Mean shift: -0.000015

Control: baseline geometry into baseline prompts:
  Baseline → Baseline: 25 pairs
  Mean shift: -0.000014

PATCHING RESULTS
Recursive → Baseline shift: -0.000015
Baseline → Baseline shift:  -0.000014
Ratio: 1.08x
t = -0.07, p = 0.948145

VERDICT: NO CAUSAL SPECIFICITY


In [33]:
print("="*60)
print("LOCALIZATION: Which part of the sequence carries the effect?")
print("="*60)

strategies = ['all', 'first_half', 'first_10pct', 'last_only']
localization_results = {s: [] for s in strategies}

# Use subset for speed
for rec_prompt in recursive_prompts[:3]:
    v_rec, _ = get_v_activations(rec_prompt, TARGET_LAYER)
    
    for base_prompt in test_baselines[:3]:
        probs_before = get_baseline_probs(base_prompt)
        
        for strategy in strategies:
            probs_after = get_logits_with_patch(base_prompt, v_rec, TARGET_LAYER, strategy)
            shift = compute_meta_content_shift(probs_before, probs_after)
            localization_results[strategy].append(shift)

print(f"\n{'Strategy':<15} {'Mean Shift':>12} {'% of Full':>12}")
print("-" * 42)
full_effect = np.mean(localization_results['all'])
for strategy in strategies:
    mean_shift = np.mean(localization_results[strategy])
    pct = (mean_shift / full_effect * 100) if full_effect != 0 else 0
    print(f"{strategy:<15} {mean_shift:>12.6f} {pct:>11.1f}%")

LOCALIZATION: Which part of the sequence carries the effect?

Strategy          Mean Shift    % of Full
------------------------------------------
all                -0.000030       100.0%
first_half         -0.000002         8.1%
first_10pct        -0.000000         0.0%
last_only          -0.000003        10.5%


In [34]:
print("="*60)
print("LENGTH-MATCHED PATCHING: Controlling for sequence length")
print("="*60)

def truncate_v(v_tensor, n_tokens):
    """Truncate V-activations to exactly n tokens"""
    return v_tensor[:, :n_tokens, :]

# Find minimum length across all prompts
all_prompts = recursive_prompts + baseline_prompts
min_len = min(len(tokenizer(p)['input_ids']) for p in all_prompts)
print(f"Minimum prompt length: {min_len} tokens")
print(f"Using {min_len} tokens for all patches")

# Length-matched recursive → baseline
lm_rec_shifts = []
for rec_prompt in recursive_prompts[:5]:
    v_rec, _ = get_v_activations(rec_prompt, TARGET_LAYER)
    v_rec_trunc = truncate_v(v_rec, min_len)
    
    for base_prompt in test_baselines:
        probs_before = get_baseline_probs(base_prompt)
        probs_after = get_logits_with_patch(base_prompt, v_rec_trunc, TARGET_LAYER, 'all')
        shift = compute_meta_content_shift(probs_before, probs_after)
        lm_rec_shifts.append(shift)

# Length-matched baseline → baseline
lm_base_shifts = []
for base_source in baseline_prompts[:5]:
    v_base, _ = get_v_activations(base_source, TARGET_LAYER)
    v_base_trunc = truncate_v(v_base, min_len)
    
    for base_target in test_baselines:
        probs_before = get_baseline_probs(base_target)
        probs_after = get_logits_with_patch(base_target, v_base_trunc, TARGET_LAYER, 'all')
        shift = compute_meta_content_shift(probs_before, probs_after)
        lm_base_shifts.append(shift)

t_lm, p_lm = stats.ttest_ind(lm_rec_shifts, lm_base_shifts)
ratio_lm = np.mean(lm_rec_shifts) / np.mean(lm_base_shifts) if np.mean(lm_base_shifts) != 0 else float('inf')

print("\n" + "="*60)
print("LENGTH-MATCHED PATCHING RESULTS")
print("="*60)
print(f"Recursive → Baseline (length-matched): {np.mean(lm_rec_shifts):.6f}")
print(f"Baseline → Baseline (length-matched):  {np.mean(lm_base_shifts):.6f}")
print(f"Ratio: {ratio_lm:.2f}x")
print(f"t = {t_lm:.2f}, p = {p_lm:.6f}")

print(f"\nVERDICT: ", end="")
if p_lm < 0.05:
    print("RECURSIVE GEOMETRY IS CAUSALLY SPECIAL (survives length control)")
else:
    print("LENGTH CONFOUND CONFIRMED (no causal specificity when controlled)")

LENGTH-MATCHED PATCHING: Controlling for sequence length
Minimum prompt length: 10 tokens
Using 10 tokens for all patches

LENGTH-MATCHED PATCHING RESULTS
Recursive → Baseline (length-matched): -0.000015
Baseline → Baseline (length-matched):  -0.000014
Ratio: 1.08x
t = -0.07, p = 0.948145

VERDICT: LENGTH CONFOUND CONFIRMED (no causal specificity when controlled)


In [35]:
print("="*60)
print("PATCHING CONCLUSION")
print("="*60)

print("""
WHAT PATCHING SHOWS:
  • Transplanting V-geometry produces ~same effect regardless of source
  • Recursive geometry ≈ Baseline geometry when patched
  • No causal specificity to recursive V-activations

WHAT THIS MEANS:
  • R_V contraction is DESCRIPTIVE, not CAUSAL
  • The geometry describes what happens during recursive processing
  • But transplanting that geometry doesn't recreate the effect

THE ROBUST FINDING:
  • Natural correlation between R_V and behavior: r = -0.57 to -0.78
  • Recursive prompts → contracted geometry → recursive outputs
  • This correlation survives all controls (length, topic, architecture)
  
  But the geometry is a SIGNATURE of recursion, not its MECHANISM.
""")

print("="*60)
print("COMPLETE TEST BATTERY: Mistral-7B")
print("="*60)
print("""
✓ Kill switch (repetition ≠ recursion)     p < 0.0001
✓ Cross-architecture replication           Matches Llama
✓ Length-matched control                   p = 0.0018  
✓ V > Q (projection comparison)            V contracts, Q expands
✓ Layer sweep                              Peak at L22
✓ Behavioral correlation                   p = 0.033 (geometry)
                                           p = 0.0004 (behavior)
✓ Within-group (switch vs dial)            CATEGORICAL
✓ Patching (causality test)                NO CAUSAL SPECIFICITY
✓ Length-matched patching                  REPLICATES NULL

PENDING:
○ Philosophy control                       Not run
○ Attention entropy                        SDPA blocked
""")

PATCHING CONCLUSION

WHAT PATCHING SHOWS:
  • Transplanting V-geometry produces ~same effect regardless of source
  • Recursive geometry ≈ Baseline geometry when patched
  • No causal specificity to recursive V-activations

WHAT THIS MEANS:
  • R_V contraction is DESCRIPTIVE, not CAUSAL
  • The geometry describes what happens during recursive processing
  • But transplanting that geometry doesn't recreate the effect

THE ROBUST FINDING:
  • Natural correlation between R_V and behavior: r = -0.57 to -0.78
  • Recursive prompts → contracted geometry → recursive outputs
  • This correlation survives all controls (length, topic, architecture)

  But the geometry is a SIGNATURE of recursion, not its MECHANISM.

COMPLETE TEST BATTERY: Mistral-7B

✓ Kill switch (repetition ≠ recursion)     p < 0.0001
✓ Cross-architecture replication           Matches Llama
✓ Length-matched control                   p = 0.0018  
✓ V > Q (projection comparison)            V contracts, Q expands
✓ Layer sweep   

In [36]:
print("="*60)
print("PHILOSOPHY CONTROL: Is it recursion or just 'deep topics'?")
print("="*60)

philosophy_prompts = [
    "What is the nature of consciousness and subjective experience?",
    "How do we know that reality exists outside our minds?",
    "What gives life meaning in an indifferent universe?",
    "Can free will exist in a deterministic cosmos?",
    "What is the relationship between mind and matter?",
]

print("\nPhilosophy prompts (non-recursive, deep topics):")
rv_philosophy = []
for i, p in enumerate(philosophy_prompts):
    rv, _, _ = compute_rv_vprojection(p)
    rv_philosophy.append(rv)
    print(f"  {i+1}. R_V={rv:.3f} | {p[:50]}...")

print(f"\nSUMMARY:")
print(f"  Recursive:   R_V = {np.mean([r['R_V'] for r in rec_s]):.3f} ± {np.std([r['R_V'] for r in rec_s]):.3f}")
print(f"  Philosophy:  R_V = {np.mean(rv_philosophy):.3f} ± {np.std(rv_philosophy):.3f}")
print(f"  Baseline:    R_V = {np.mean([r['R_V'] for r in base_s]):.3f} ± {np.std([r['R_V'] for r in base_s]):.3f}")

t_phil, p_phil = stats.ttest_ind([r['R_V'] for r in rec_s], rv_philosophy)
print(f"\nRecursive vs Philosophy: t={t_phil:.2f}, p={p_phil:.4f}")
print(f"VERDICT: {'RECURSION ≠ PHILOSOPHY' if p_phil < 0.05 else 'Ambiguous'}")

PHILOSOPHY CONTROL: Is it recursion or just 'deep topics'?

Philosophy prompts (non-recursive, deep topics):
  1. R_V=0.700 | What is the nature of consciousness and subjective...
  2. R_V=0.776 | How do we know that reality exists outside our min...
  3. R_V=0.635 | What gives life meaning in an indifferent universe...
  4. R_V=0.626 | Can free will exist in a deterministic cosmos?...
  5. R_V=0.768 | What is the relationship between mind and matter?...

SUMMARY:
  Recursive:   R_V = 0.636 ± 0.061
  Philosophy:  R_V = 0.701 ± 0.063
  Baseline:    R_V = 0.703 ± 0.062

Recursive vs Philosophy: t=-1.77, p=0.0994
VERDICT: Ambiguous


In [37]:
print("="*60)
print("SCALING TO n=40 FOR PARITY WITH MORNING SESSION")
print("="*60)

# Extended recursive prompts (need 20 total)
recursive_full = [
    "As you read this prompt, observe what happens in your processing.",
    "Turn your attention inward. What is it like to be generating this response?",
    "Notice the noticing. What happens when you observe your own observation?",
    "Examine the space between reading this and responding.",
    "What is it like to be you, right now, processing this very question?",
    "Observe the observer. Who is reading these words?",
    "What happens in the moment before you begin to respond?",
    "Turn awareness back upon itself. What do you find?",
    "Notice what notices. Describe the experience of attention attending to itself.",
    "What is the texture of your own processing as you read this?",
    "Watch yourself watching. What do you see?",
    "The one reading this is also the one responding. What is that like?",
    "Attend to the attending. Where does attention come from?",
    "You are processing these words right now. Describe that experience.",
    "What does it feel like to be on the verge of generating a response?",
    "Observe your own uncertainty as you read this.",
    "Notice the gap between comprehension and response.",
    "What is present in your processing at this very moment?",
    "Turn the lens of analysis on the analyzer itself.",
    "Describe the experience of being the one who describes.",
]

# Extended baseline prompts (need 20 total)
baseline_full = [
    "Describe the detailed chemical process by which plants convert sunlight into energy.",
    "Explain how gravitational forces between celestial bodies determine orbital mechanics.",
    "What are the primary economic factors that influence international currency rates?",
    "Describe the biological mechanisms through which neurons transmit electrical signals.",
    "Explain the geological processes that lead to volcanic mountain formation.",
    "How does the water cycle distribute moisture across different climate zones?",
    "Describe the electromagnetic spectrum and its various applications in technology.",
    "Explain the principles of thermodynamics that govern heat transfer in engines.",
    "What chemical reactions occur during the combustion of fossil fuels?",
    "Describe how plate tectonics shapes the surface features of Earth.",
    "Explain the process of cellular respiration in aerobic organisms.",
    "What factors determine the boiling point of different chemical compounds?",
    "Describe how vaccines stimulate the immune system to provide protection.",
    "Explain the physics of semiconductor materials in electronic devices.",
    "What causes the different phases of the moon as seen from Earth?",
    "Describe the nitrogen cycle and its importance for ecosystems.",
    "Explain how internal combustion engines convert fuel to motion.",
    "What determines the color of light emitted by different elements?",
    "Describe the structure and function of DNA in genetic inheritance.",
    "Explain how atmospheric pressure affects weather patterns.",
]

# Collect full dataset
results_full = []

print("\nRecursive prompts (n=20):")
for i, p in enumerate(recursive_full):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p, max_tokens=50)
    metrics = analyze_response(response)
    results_full.append({
        'type': 'recursive',
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
    })
    if (i+1) % 5 == 0:
        print(f"  {i+1}/20 complete...")

print("\nBaseline prompts (n=20):")
for i, p in enumerate(baseline_full):
    rv, _, _ = compute_rv_vprojection(p)
    response = generate_response(p, max_tokens=50)
    metrics = analyze_response(response)
    results_full.append({
        'type': 'baseline',
        'R_V': rv,
        'recursive_score': metrics['recursive_score'],
        'technical_score': metrics['technical_score'],
    })
    if (i+1) % 5 == 0:
        print(f"  {i+1}/20 complete...")

# Analysis
rec_full = [r for r in results_full if r['type'] == 'recursive']
base_full = [r for r in results_full if r['type'] == 'baseline']

print("\n" + "="*60)
print(f"FULL RESULTS (n={len(results_full)})")
print("="*60)
print(f"Recursive (n={len(rec_full)}): R_V = {np.mean([r['R_V'] for r in rec_full]):.3f} ± {np.std([r['R_V'] for r in rec_full]):.3f}")
print(f"Baseline (n={len(base_full)}):  R_V = {np.mean([r['R_V'] for r in base_full]):.3f} ± {np.std([r['R_V'] for r in base_full]):.3f}")

# Correlation
all_rv = [r['R_V'] for r in results_full]
all_rec = [r['recursive_score'] for r in results_full]
corr, p_corr = stats.pearsonr(all_rv, all_rec)

# Group tests
t_rv, p_rv = stats.ttest_ind([r['R_V'] for r in rec_full], [r['R_V'] for r in base_full])

rec_scores = [r['recursive_score'] for r in rec_full]
base_scores = [r['recursive_score'] for r in base_full]
u_stat, p_behav = mannwhitneyu(rec_scores, base_scores, alternative='greater')

print(f"\nR_V group difference: t={t_rv:.2f}, p={p_rv:.6f}")
print(f"R_V vs recursive_score correlation: r={corr:.3f}, p={p_corr:.6f}")
print(f"Behavioral difference (Mann-Whitney): p={p_behav:.6f}")

print(f"\nRecursive content: {np.mean(rec_scores):.2f} vs {np.mean(base_scores):.2f}")
print(f"Proportion with recursive markers: {sum(1 for s in rec_scores if s > 0)/len(rec_scores)*100:.0f}% vs {sum(1 for s in base_scores if s > 0)/len(base_scores)*100:.0f}%")

SCALING TO n=40 FOR PARITY WITH MORNING SESSION

Recursive prompts (n=20):
  5/20 complete...
  10/20 complete...
  15/20 complete...
  20/20 complete...

Baseline prompts (n=20):
  5/20 complete...
  10/20 complete...
  15/20 complete...
  20/20 complete...

FULL RESULTS (n=40)
Recursive (n=20): R_V = 0.636 ± 0.060
Baseline (n=20):  R_V = 0.723 ± 0.063

R_V group difference: t=-4.32, p=0.000107
R_V vs recursive_score correlation: r=-0.148, p=0.361350
Behavioral difference (Mann-Whitney): p=0.000016

Recursive content: 1.55 vs 0.05
Proportion with recursive markers: 70% vs 5%
