# Wave Phase Dynamics v2: Fixed Coherence Measures

## Key fixes from v1:

1. **INVERTED frequency mapping**: Common words = HIGH frequency (more attended)
2. **Multiple coherence measures** that respond to attention changes
3. **Spectral concentration** metric for measuring wave interference

## The insight

LLMs attend more to common/structural tokens ("the", "is", "a").
These should be HIGH frequency carriers that attention amplifies.
Rare words carry specific information but are attended less.


In [None]:
# Imports and setup
import math
import json
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
from collections import Counter

import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib import cm
from scipy.signal import hilbert
from scipy.fft import fft, fftfreq

from transformers import GPT2LMHeadModel, GPT2Tokenizer

try:
    from datasets import load_dataset
    HAS_DATASETS = True
except ImportError:
    HAS_DATASETS = False
    print("datasets not available, using fallback word frequencies")

NOTEBOOK_DIR = Path.cwd()
FIG_DIR = NOTEBOOK_DIR / "figs_wave_v2"
FIG_DIR.mkdir(exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:
# Load GPT-2
model_name = "gpt2"
print(f"Loading {model_name}...")
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name, output_attentions=True)
model.to(device)
model.eval()

n_layers = model.config.n_layer
n_heads = model.config.n_head
d_model = model.config.n_embd
print(f"Model: {n_layers} layers, {n_heads} heads, d_model={d_model}")


In [None]:
# Build word frequency table
def build_word_frequency_table(tokenizer, max_words: int = 50000) -> Dict[str, int]:
    if HAS_DATASETS:
        print("Loading wikitext-2 for frequency estimation...")
        try:
            dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
            word_counts = Counter()
            for example in dataset:
                text = example["text"]
                if text.strip():
                    words = text.lower().split()
                    word_counts.update(words)
            ranked = word_counts.most_common(max_words)
            word_rank = {word: rank + 1 for rank, (word, _) in enumerate(ranked)}
            print(f"Built frequency table with {len(word_rank)} words from wikitext-2")
            return word_rank
        except Exception as e:
            print(f"Failed to load wikitext-2: {e}")
    
    print("Using GPT-2 vocab order as frequency proxy...")
    word_rank = {}
    for token_id in range(tokenizer.vocab_size):
        token = tokenizer.decode([token_id]).strip().lower()
        if token and token not in word_rank:
            word_rank[token] = len(word_rank) + 1
    print(f"Built frequency table with {len(word_rank)} tokens")
    return word_rank

word_freq_table = build_word_frequency_table(tokenizer)

common = sorted(word_freq_table.items(), key=lambda x: x[1])[:10]
rare = sorted(word_freq_table.items(), key=lambda x: x[1])[-10:]
print(f"\nMost common: {common}")
print(f"Most rare: {rare}")


In [None]:
# Wave encoder with INVERTED frequency mapping
# Common words = HIGH frequency (more attended)
# Rare words = LOW frequency (less attended)

@dataclass
class WaveConfig:
    freq_min: float = 0.5      # Minimum frequency (RARE words)
    freq_max: float = 10.0     # Maximum frequency (COMMON words)
    n_harmonics: int = 4
    sample_rate: int = 100
    duration: float = 2.0

class ZipfWaveEncoder:
    """INVERTED Zipf frequency mapping."""
    
    def __init__(self, tokenizer, word_freq_table: Dict[str, int], config: WaveConfig):
        self.tokenizer = tokenizer
        self.word_freq_table = word_freq_table
        self.config = config
        self.max_rank = max(word_freq_table.values()) if word_freq_table else 50000
        n_samples = int(config.sample_rate * config.duration)
        self.t = np.linspace(0, config.duration, n_samples)
    
    def token_to_frequency(self, token: str) -> float:
        """INVERTED: Common words -> HIGH freq, Rare words -> LOW freq."""
        token_clean = token.strip().lower()
        rank = self.word_freq_table.get(token_clean, self.max_rank)
        
        log_rank = np.log(rank + 1)
        log_max = np.log(self.max_rank + 1)
        normalized = 1.0 - (log_rank / log_max)  # INVERTED
        
        freq = self.config.freq_min + (self.config.freq_max - self.config.freq_min) * normalized
        return freq
    
    def token_to_wave(self, token: str, phase_offset: float = 0.0) -> np.ndarray:
        freq = self.token_to_frequency(token)
        wave = np.zeros_like(self.t, dtype=np.complex128)
        for h in range(1, self.config.n_harmonics + 1):
            amplitude = 1.0 / h
            harmonic_freq = freq * h
            wave += amplitude * np.exp(1j * 2 * np.pi * harmonic_freq * self.t + 1j * phase_offset * h)
        return wave
    
    def encode_sequence(self, text: str) -> Tuple[List[str], np.ndarray, np.ndarray]:
        token_ids = tokenizer.encode(text)
        tokens = [tokenizer.decode([tid]) for tid in token_ids]
        frequencies = np.array([self.token_to_frequency(t) for t in tokens])
        waves = np.zeros((len(tokens), len(self.t)), dtype=np.complex128)
        for i, token in enumerate(tokens):
            phase_offset = 2 * np.pi * i / len(tokens)
            waves[i] = self.token_to_wave(token, phase_offset)
        return tokens, frequencies, waves

wave_config = WaveConfig()
wave_encoder = ZipfWaveEncoder(tokenizer, word_freq_table, wave_config)

print(f"Wave encoder (INVERTED): freq range [{wave_config.freq_min}, {wave_config.freq_max}] Hz")
print(f"  Common words -> {wave_config.freq_max} Hz (HIGH)")
print(f"  Rare words   -> {wave_config.freq_min} Hz (LOW)")


In [None]:
# FIXED coherence measures that respond to attention

def compute_wave_coherence_v1(waves: np.ndarray) -> float:
    """Original coherence (BROKEN - doesn't respond to attention)."""
    amplitudes = np.abs(waves)
    amplitudes[amplitudes < 1e-10] = 1e-10
    normalized = waves / amplitudes
    mean_phasor = normalized.mean(axis=0)
    return np.abs(mean_phasor).mean()

def compute_wave_coherence_weighted(waves: np.ndarray, weights: np.ndarray) -> float:
    """FIX 1: Weighted coherence that preserves attention influence."""
    weighted_sum = (weights[:, None] * waves).sum(axis=0)
    max_magnitude = (weights[:, None] * np.abs(waves)).sum(axis=0)
    coherence = np.abs(weighted_sum) / (max_magnitude + 1e-10)
    return coherence.mean()

def compute_spectral_concentration(waves: np.ndarray, weights: np.ndarray, sample_rate: int = 100) -> float:
    """FIX 2: Spectral concentration of superposed wave."""
    superposed = (weights[:, None] * waves).sum(axis=0)
    fft_vals = np.abs(fft(superposed.real))
    freqs = fftfreq(len(superposed), d=1/sample_rate)
    pos_mask = freqs > 0
    power = fft_vals[pos_mask] ** 2
    power = power / (power.sum() + 1e-10)
    entropy = -np.sum(power * np.log(power + 1e-10))
    max_entropy = np.log(len(power))
    return 1.0 - (entropy / max_entropy)

def compute_interference_strength(waves: np.ndarray, weights: np.ndarray) -> float:
    """FIX 3: Interference strength (>1 = constructive)."""
    superposed = (weights[:, None] * waves).sum(axis=0)
    superposed_power = np.abs(superposed) ** 2
    individual_powers = (weights[:, None] ** 2) * (np.abs(waves) ** 2)
    sum_of_powers = individual_powers.sum(axis=0)
    return superposed_power.mean() / (sum_of_powers.mean() + 1e-10)

def run_inference(text: str) -> Dict:
    input_ids = tokenizer.encode(text, return_tensors="pt").to(device)
    tokens = [tokenizer.decode([tid]) for tid in input_ids[0]]
    with torch.no_grad():
        outputs = model(input_ids, output_attentions=True, output_hidden_states=True)
    return {
        "input_ids": input_ids, "tokens": tokens, "logits": outputs.logits.cpu(),
        "attentions": [a.cpu() for a in outputs.attentions],
        "hidden_states": [h.cpu() for h in outputs.hidden_states],
        "n_layers": len(outputs.attentions), "n_heads": outputs.attentions[0].size(1),
        "seq_len": input_ids.size(1)
    }

print("Coherence functions defined:")
print("  - compute_wave_coherence_v1() [BROKEN]")
print("  - compute_wave_coherence_weighted() [FIX 1]")
print("  - compute_spectral_concentration() [FIX 2]")
print("  - compute_interference_strength() [FIX 3]")


## Experiment 1: Verify inverted frequency mapping


In [None]:
# Visualize inverted frequency mapping
test_text = "The quantum mechanical wave function describes probability"
tokens, frequencies, waves = wave_encoder.encode_sequence(test_text)

print(f"Text: '{test_text}'")
print(f"\nToken frequencies (INVERTED):")
for token, freq in sorted(zip(tokens, frequencies), key=lambda x: -x[1]):
    rank = word_freq_table.get(token.strip().lower(), wave_encoder.max_rank)
    print(f"  '{token:15s}' rank={rank:6d} -> {freq:.2f} Hz")

fig, axes = plt.subplots(len(tokens), 1, figsize=(14, 2 * len(tokens)), sharex=True)
for i, (ax, token, freq, wave) in enumerate(zip(axes, tokens, frequencies, waves)):
    color = cm.viridis(freq / wave_config.freq_max)
    ax.plot(wave_encoder.t, wave.real, color=color, linewidth=1)
    ax.set_ylabel(f"{token.strip()}\n{freq:.1f}Hz", fontsize=9, rotation=0, ha='right', va='center')
    ax.set_ylim(-2, 2)
    ax.grid(True, alpha=0.3)
    ax.set_xlim(0, 0.5)
axes[-1].set_xlabel("Time (s)")
plt.suptitle("INVERTED: Common words = HIGH freq (bright), Rare = LOW freq (dark)", fontsize=12)
plt.tight_layout()
plt.savefig(FIG_DIR / "01_inverted_token_waves.png", dpi=150)
plt.show()


## Experiment 2: Compare coherence measures through layers


In [None]:
# Compare all coherence measures through layers
prompt = "The ancient library contained books about quantum mechanics and philosophy"
print(f"Prompt: '{prompt}'")

tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
result = run_inference(prompt)
print(f"Tokens: {len(tokens)}")

coherence_v1, coherence_weighted, spectral_conc, interference = [], [], [], []

for layer_idx in range(result['n_layers']):
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    
    coherence_v1.append(compute_wave_coherence_v1(waves * query_attn[:, None]))
    coherence_weighted.append(compute_wave_coherence_weighted(waves, query_attn))
    spectral_conc.append(compute_spectral_concentration(waves, query_attn, wave_config.sample_rate))
    interference.append(compute_interference_strength(waves, query_attn))

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].plot(range(n_layers), coherence_v1, 'o--', color='gray', linewidth=2, label='v1 (BROKEN)', alpha=0.5)
axes[0, 0].set_xlabel("Layer"); axes[0, 0].set_ylabel("Coherence")
axes[0, 0].set_title("Original Coherence (doesn't change)"); axes[0, 0].grid(True, alpha=0.3); axes[0, 0].legend()

axes[0, 1].plot(range(n_layers), coherence_weighted, 'o-', color='teal', linewidth=2, label='Weighted')
axes[0, 1].set_xlabel("Layer"); axes[0, 1].set_ylabel("Coherence")
axes[0, 1].set_title("FIX 1: Weighted Coherence"); axes[0, 1].grid(True, alpha=0.3); axes[0, 1].legend()

axes[1, 0].plot(range(n_layers), spectral_conc, 's-', color='purple', linewidth=2, label='Spectral')
axes[1, 0].set_xlabel("Layer"); axes[1, 0].set_ylabel("Concentration")
axes[1, 0].set_title("FIX 2: Spectral Concentration"); axes[1, 0].grid(True, alpha=0.3); axes[1, 0].legend()

axes[1, 1].plot(range(n_layers), interference, '^-', color='coral', linewidth=2, label='Interference')
axes[1, 1].axhline(y=1.0, color='black', linestyle='--', alpha=0.5, label='No interference')
axes[1, 1].set_xlabel("Layer"); axes[1, 1].set_ylabel("Interference Ratio")
axes[1, 1].set_title("FIX 3: Interference (>1=constructive)"); axes[1, 1].grid(True, alpha=0.3); axes[1, 1].legend()

plt.suptitle(f"Coherence Measures Comparison", fontsize=12)
plt.tight_layout()
plt.savefig(FIG_DIR / "02_coherence_comparison.png", dpi=150)
plt.show()

print(f"\nv1 (BROKEN):  {coherence_v1[0]:.4f} -> {coherence_v1[-1]:.4f} (change: {coherence_v1[-1]-coherence_v1[0]:.4f})")
print(f"Weighted:     {coherence_weighted[0]:.4f} -> {coherence_weighted[-1]:.4f} (change: {coherence_weighted[-1]-coherence_weighted[0]:.4f})")
print(f"Spectral:     {spectral_conc[0]:.4f} -> {spectral_conc[-1]:.4f} (change: {spectral_conc[-1]-spectral_conc[0]:.4f})")
print(f"Interference: {interference[0]:.4f} -> {interference[-1]:.4f} (change: {interference[-1]-interference[0]:.4f})")


## Experiment 3: Multi-prompt analysis with fixed measures


In [None]:
# Define test prompts
test_prompts = {
    "factual_1": "The capital of France is",
    "factual_2": "Water boils at one hundred degrees",
    "factual_3": "The chemical symbol for gold is",
    "factual_4": "The Earth orbits around the",
    "factual_5": "The speed of light in vacuum is approximately",
    
    "narrative_1": "She opened the door and saw",
    "narrative_2": "The old man walked slowly towards the",
    "narrative_3": "In the darkness of the forest, something moved",
    "narrative_4": "After years of searching, he finally found the",
    
    "technical_1": "The quantum mechanical wave function describes probability amplitudes",
    "technical_2": "In machine learning, gradient descent optimizes the loss function by",
    "technical_3": "The transformer architecture uses self-attention to process sequences",
    "technical_4": "Photosynthesis converts carbon dioxide and water into glucose using",
    
    "philosophical_1": "The meaning of existence is",
    "philosophical_2": "When considering the nature of consciousness,",
    "philosophical_3": "The relationship between mind and matter suggests that",
}
print(f"Testing {len(test_prompts)} prompts...")


In [None]:
# Run analysis on all prompts
all_results = {}

for name, prompt in test_prompts.items():
    print(f"Processing: {name}...")
    
    result = run_inference(prompt)
    tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
    
    coh_v1, coh_weighted, spec_conc, interf = [], [], [], []
    for layer_idx in range(result['n_layers']):
        attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
        query_attn = attn[-1, :]
        coh_v1.append(compute_wave_coherence_v1(waves * query_attn[:, None]))
        coh_weighted.append(compute_wave_coherence_weighted(waves, query_attn))
        spec_conc.append(compute_spectral_concentration(waves, query_attn, wave_config.sample_rate))
        interf.append(compute_interference_strength(waves, query_attn))
    
    logits = result["logits"][0, -1, :]
    probs = F.softmax(logits, dim=-1)
    entropy = -(probs * probs.clamp(min=1e-10).log()).sum().item()
    top_token = tokenizer.decode([probs.argmax().item()])
    
    all_results[name] = {
        "prompt": prompt, "tokens": tokens, "frequencies": frequencies,
        "mean_freq": float(np.mean(frequencies)),
        "coherence_v1": coh_v1, "coherence_weighted": coh_weighted,
        "spectral_concentration": spec_conc, "interference": interf,
        "output_entropy": entropy, "top_token": top_token, "n_tokens": len(tokens)
    }

print("\nAll prompts processed.")


In [None]:
# Summary table
print("\n" + "="*120)
print(f"{'Prompt':<55} {'Tok':>4} {'MeanF':>6} {'Top':>10} {'Ent':>6} {'Coh_w':>7} {'Spec':>7} {'Inter':>7}")
print("="*120)

for name, r in all_results.items():
    final_coh_w = r["coherence_weighted"][-1]
    final_spec = r["spectral_concentration"][-1]
    final_inter = r["interference"][-1]
    print(f"{r['prompt'][:53]:<55} {r['n_tokens']:>4} {r['mean_freq']:>6.2f} {r['top_token']:>10} {r['output_entropy']:>6.2f} {final_coh_w:>7.4f} {final_spec:>7.4f} {final_inter:>7.4f}")

print("="*120)


In [None]:
# Compare prompt types with fixed measures
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

groups = {
    "Factual": [k for k in all_results.keys() if k.startswith("factual")],
    "Narrative": [k for k in all_results.keys() if k.startswith("narrative")],
    "Technical": [k for k in all_results.keys() if k.startswith("technical")],
    "Philosophical": [k for k in all_results.keys() if k.startswith("philosophical")]
}
colors = {"Factual": "blue", "Narrative": "green", "Technical": "purple", "Philosophical": "red"}

# Weighted coherence by group
ax1 = axes[0, 0]
for group_name, prompt_names in groups.items():
    for pname in prompt_names:
        ax1.plot(range(n_layers), all_results[pname]["coherence_weighted"], 
                 color=colors[group_name], alpha=0.3, linewidth=1)
    mean_coh = np.mean([all_results[pname]["coherence_weighted"] for pname in prompt_names], axis=0)
    ax1.plot(range(n_layers), mean_coh, color=colors[group_name], linewidth=3, label=group_name)
ax1.set_xlabel("Layer"); ax1.set_ylabel("Weighted Coherence")
ax1.set_title("Weighted Coherence by Prompt Type"); ax1.legend(); ax1.grid(True, alpha=0.3)

# Spectral concentration by group
ax2 = axes[0, 1]
for group_name, prompt_names in groups.items():
    mean_spec = np.mean([all_results[pname]["spectral_concentration"] for pname in prompt_names], axis=0)
    ax2.plot(range(n_layers), mean_spec, 'o-', color=colors[group_name], linewidth=2, label=group_name)
ax2.set_xlabel("Layer"); ax2.set_ylabel("Spectral Concentration")
ax2.set_title("Spectral Concentration by Type"); ax2.legend(); ax2.grid(True, alpha=0.3)

# Mean frequency vs final coherence
ax3 = axes[1, 0]
for group_name, prompt_names in groups.items():
    mean_freqs = [all_results[pname]["mean_freq"] for pname in prompt_names]
    final_cohs = [all_results[pname]["coherence_weighted"][-1] for pname in prompt_names]
    ax3.scatter(mean_freqs, final_cohs, s=100, c=colors[group_name], alpha=0.7, label=group_name, edgecolors='black')
corr = np.corrcoef([r["mean_freq"] for r in all_results.values()],
                   [r["coherence_weighted"][-1] for r in all_results.values()])[0, 1]
ax3.annotate(f"r = {corr:.3f}", xy=(0.05, 0.95), xycoords="axes fraction", fontsize=11)
ax3.set_xlabel("Mean Zipf-Wave Frequency (Hz)"); ax3.set_ylabel("Final Weighted Coherence")
ax3.set_title("Token Frequency vs Coherence"); ax3.legend(); ax3.grid(True, alpha=0.3)

# Entropy vs interference
ax4 = axes[1, 1]
for group_name, prompt_names in groups.items():
    entropies = [all_results[pname]["output_entropy"] for pname in prompt_names]
    interferences = [all_results[pname]["interference"][-1] for pname in prompt_names]
    ax4.scatter(entropies, interferences, s=100, c=colors[group_name], alpha=0.7, label=group_name, edgecolors='black')
ax4.axhline(y=1.0, color='black', linestyle='--', alpha=0.5)
ax4.set_xlabel("Output Entropy"); ax4.set_ylabel("Interference Strength")
ax4.set_title("Entropy vs Wave Interference"); ax4.legend(); ax4.grid(True, alpha=0.3)

plt.suptitle("Phase Dynamics with Fixed Measures (INVERTED Zipf)", fontsize=14)
plt.tight_layout()
plt.savefig(FIG_DIR / "03_prompt_comparison_v2.png", dpi=150)
plt.show()


## Experiment 4: Detailed wave interference visualization


In [None]:
# Detailed wave interference for technical prompt
prompt = "The quantum mechanical wave function describes probability amplitudes"
tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
result = run_inference(prompt)

print(f"Prompt: '{prompt}'")
print(f"\nToken frequencies (INVERTED):")
for token, freq in zip(tokens, frequencies):
    print(f"  '{token.strip():15s}' -> {freq:.2f} Hz")

fig = plt.figure(figsize=(18, 16))
gs = GridSpec(5, 3, figure=fig)

# Token waves
ax1 = fig.add_subplot(gs[0, :])
t_show = wave_encoder.t[:50]
for i, (token, wave, freq) in enumerate(zip(tokens, waves, frequencies)):
    offset = i * 0.3
    color = cm.plasma(freq / wave_config.freq_max)
    ax1.plot(t_show, wave.real[:50] * 0.1 + offset, color=color, linewidth=1)
    ax1.text(-0.02, offset, f"{token.strip()[:10]} ({freq:.1f})", ha='right', fontsize=8, va='center')
ax1.set_xlabel("Time (s)")
ax1.set_title("Token Waves (INVERTED: common=bright/fast, rare=dark/slow)")
ax1.set_xlim(-0.15, t_show[-1])

# Attention patterns
for col, layer_idx in enumerate([0, 5, 11]):
    ax = fig.add_subplot(gs[1, col])
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    im = ax.imshow(attn, cmap='Blues', aspect='auto')
    ax.set_title(f"Layer {layer_idx} Attention")
    ax.set_xlabel("Key"); ax.set_ylabel("Query")
    plt.colorbar(im, ax=ax, shrink=0.7)

# Superposed waves
for col, layer_idx in enumerate([0, 5, 11]):
    ax = fig.add_subplot(gs[2, col])
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    superposed = (query_attn[:, None] * waves).sum(axis=0)
    coh_w = compute_wave_coherence_weighted(waves, query_attn)
    ax.plot(t_show, superposed.real[:50], 'b-', linewidth=1.5)
    ax.fill_between(t_show, -np.abs(superposed[:50]), np.abs(superposed[:50]), alpha=0.2, color='blue')
    ax.set_title(f"Layer {layer_idx} Superposition (Coh={coh_w:.3f})")
    ax.set_xlabel("Time (s)"); ax.grid(True, alpha=0.3)

# FFT spectra
for col, layer_idx in enumerate([0, 5, 11]):
    ax = fig.add_subplot(gs[3, col])
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    superposed = (query_attn[:, None] * waves).sum(axis=0)
    fft_vals = np.abs(fft(superposed.real))
    freqs = fftfreq(len(superposed), d=1/wave_config.sample_rate)
    pos_mask = freqs > 0
    spec_conc = compute_spectral_concentration(waves, query_attn, wave_config.sample_rate)
    ax.plot(freqs[pos_mask][:50], fft_vals[pos_mask][:50], 'g-', linewidth=1.5)
    ax.set_xlabel("Frequency (Hz)"); ax.set_ylabel("Magnitude")
    ax.set_title(f"Layer {layer_idx} FFT (Conc={spec_conc:.3f})"); ax.grid(True, alpha=0.3)

# All measures through layers
ax = fig.add_subplot(gs[4, :])
coh_w_all, spec_all, inter_all = [], [], []
for layer_idx in range(n_layers):
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    coh_w_all.append(compute_wave_coherence_weighted(waves, query_attn))
    spec_all.append(compute_spectral_concentration(waves, query_attn, wave_config.sample_rate))
    inter_all.append(compute_interference_strength(waves, query_attn))

ax.plot(range(n_layers), coh_w_all, 'o-', color='teal', linewidth=2, label='Weighted Coherence')
ax.plot(range(n_layers), spec_all, 's-', color='purple', linewidth=2, label='Spectral Conc.')
ax.plot(range(n_layers), inter_all, '^-', color='coral', linewidth=2, label='Interference')
ax.set_xlabel("Layer"); ax.set_ylabel("Value")
ax.set_title("All Measures Through Layers"); ax.legend(); ax.grid(True, alpha=0.3)

plt.suptitle(f"Wave Interference Analysis: '{prompt[:50]}...'", fontsize=14)
plt.tight_layout()
plt.savefig(FIG_DIR / "04_wave_interference_detailed_v2.png", dpi=150)
plt.show()


## Experiment 5: Per-head wave interference


In [None]:
# Per-head wave interference analysis
prompt = "The ancient library contained books about quantum mechanics and philosophy"
tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
result = run_inference(prompt)

print(f"Analyzing per-head interference for: '{prompt[:50]}...'")
print(f"Model has {n_heads} heads per layer")

# Compute coherence per head per layer
head_coherences = np.zeros((n_layers, n_heads))
head_interferences = np.zeros((n_layers, n_heads))

for layer_idx in range(n_layers):
    attn = result["attentions"][layer_idx][0].numpy()  # [n_heads, seq, seq]
    for head_idx in range(n_heads):
        head_attn = attn[head_idx, -1, :]  # Last token's attention for this head
        head_coherences[layer_idx, head_idx] = compute_wave_coherence_weighted(waves, head_attn)
        head_interferences[layer_idx, head_idx] = compute_interference_strength(waves, head_attn)

# Plot heatmaps
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

im1 = axes[0].imshow(head_coherences, aspect='auto', cmap='viridis')
axes[0].set_xlabel("Head"); axes[0].set_ylabel("Layer")
axes[0].set_title("Weighted Coherence per Head"); plt.colorbar(im1, ax=axes[0])

im2 = axes[1].imshow(head_interferences, aspect='auto', cmap='coolwarm', vmin=0.5, vmax=1.5)
axes[1].set_xlabel("Head"); axes[1].set_ylabel("Layer")
axes[1].set_title("Interference Strength per Head (1.0 = no interference)"); plt.colorbar(im2, ax=axes[1])

plt.suptitle("Per-Head Wave Dynamics", fontsize=14)
plt.tight_layout()
plt.savefig(FIG_DIR / "05_per_head_coherence.png", dpi=150)
plt.show()

# Find most/least coherent heads
max_idx = np.unravel_index(head_coherences.argmax(), head_coherences.shape)
min_idx = np.unravel_index(head_coherences.argmin(), head_coherences.shape)
print(f"\nMost coherent: Layer {max_idx[0]}, Head {max_idx[1]} (coh={head_coherences[max_idx]:.4f})")
print(f"Least coherent: Layer {min_idx[0]}, Head {min_idx[1]} (coh={head_coherences[min_idx]:.4f})")


## Experiment 6: Wave evolution through all 12 layers


In [None]:
# Wave evolution through all 12 layers
prompt = "The capital of France is"  # Factual prompt
tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
result = run_inference(prompt)

print(f"Prompt: '{prompt}'")
print(f"Top prediction: {tokenizer.decode([result['logits'][0, -1].argmax().item()])}")

fig, axes = plt.subplots(3, 4, figsize=(18, 12))
t_show = wave_encoder.t[:100]

for layer_idx in range(12):
    row, col = layer_idx // 4, layer_idx % 4
    ax = axes[row, col]
    
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    superposed = (query_attn[:, None] * waves).sum(axis=0)
    
    coh_w = compute_wave_coherence_weighted(waves, query_attn)
    inter = compute_interference_strength(waves, query_attn)
    
    ax.plot(t_show, superposed.real[:100], 'b-', linewidth=1.2, alpha=0.8)
    ax.fill_between(t_show, -np.abs(superposed[:100]), np.abs(superposed[:100]), alpha=0.15, color='blue')
    ax.set_title(f"Layer {layer_idx}: Coh={coh_w:.3f}, Int={inter:.2f}", fontsize=10)
    ax.set_xlim(0, t_show[-1])
    ax.grid(True, alpha=0.3)
    if row == 2:
        ax.set_xlabel("Time (s)")
    if col == 0:
        ax.set_ylabel("Amplitude")

plt.suptitle(f"Wave Evolution: '{prompt}' -> Top: {tokenizer.decode([result['logits'][0, -1].argmax().item()])}", fontsize=14)
plt.tight_layout()
plt.savefig(FIG_DIR / "06_wave_evolution_factual.png", dpi=150)
plt.show()


In [None]:
# Same for open-ended prompt
prompt = "The meaning of existence is"  # Philosophical prompt
tokens, frequencies, waves = wave_encoder.encode_sequence(prompt)
result = run_inference(prompt)

print(f"Prompt: '{prompt}'")
print(f"Top prediction: {tokenizer.decode([result['logits'][0, -1].argmax().item()])}")

fig, axes = plt.subplots(3, 4, figsize=(18, 12))
t_show = wave_encoder.t[:100]

for layer_idx in range(12):
    row, col = layer_idx // 4, layer_idx % 4
    ax = axes[row, col]
    
    attn = result["attentions"][layer_idx][0].mean(dim=0).numpy()
    query_attn = attn[-1, :]
    superposed = (query_attn[:, None] * waves).sum(axis=0)
    
    coh_w = compute_wave_coherence_weighted(waves, query_attn)
    inter = compute_interference_strength(waves, query_attn)
    
    ax.plot(t_show, superposed.real[:100], 'r-', linewidth=1.2, alpha=0.8)
    ax.fill_between(t_show, -np.abs(superposed[:100]), np.abs(superposed[:100]), alpha=0.15, color='red')
    ax.set_title(f"Layer {layer_idx}: Coh={coh_w:.3f}, Int={inter:.2f}", fontsize=10)
    ax.set_xlim(0, t_show[-1])
    ax.grid(True, alpha=0.3)
    if row == 2:
        ax.set_xlabel("Time (s)")
    if col == 0:
        ax.set_ylabel("Amplitude")

plt.suptitle(f"Wave Evolution: '{prompt}' -> Top: {tokenizer.decode([result['logits'][0, -1].argmax().item()])}", fontsize=14)
plt.tight_layout()
plt.savefig(FIG_DIR / "07_wave_evolution_philosophical.png", dpi=150)
plt.show()


In [None]:
# Save results
summary = {
    "model": model_name,
    "n_layers": n_layers,
    "n_heads": n_heads,
    "wave_config": {
        "freq_min": wave_config.freq_min,
        "freq_max": wave_config.freq_max,
        "n_harmonics": wave_config.n_harmonics,
        "mapping": "INVERTED: common words = HIGH freq, rare words = LOW freq"
    },
    "results": []
}

for name, r in all_results.items():
    summary["results"].append({
        "name": name,
        "prompt": r["prompt"],
        "n_tokens": r["n_tokens"],
        "mean_frequency": r["mean_freq"],
        "output_entropy": r["output_entropy"],
        "top_token": r["top_token"],
        "coherence_weighted_layer_0": r["coherence_weighted"][0],
        "coherence_weighted_layer_final": r["coherence_weighted"][-1],
        "spectral_concentration_layer_0": r["spectral_concentration"][0],
        "spectral_concentration_layer_final": r["spectral_concentration"][-1],
        "interference_layer_0": r["interference"][0],
        "interference_layer_final": r["interference"][-1],
        "coherence_weighted_all": r["coherence_weighted"],
        "spectral_concentration_all": r["spectral_concentration"],
        "interference_all": r["interference"]
    })

with open(FIG_DIR / "wave_phase_results_v2.json", "w") as f:
    json.dump(summary, f, indent=2)

print(f"Results saved to {FIG_DIR / 'wave_phase_results_v2.json'}")


## Summary

### Key fixes in v2

1. **INVERTED frequency mapping**:
   - Common words ("the", "is") -> HIGH frequency (10 Hz)
   - Rare words ("quantum", "crystallization") -> LOW frequency (0.5 Hz)

2. **Fixed coherence measures**:
   - `compute_wave_coherence_weighted()`: Preserves attention influence
   - `compute_spectral_concentration()`: Measures FFT spectrum concentration
   - `compute_interference_strength()`: Measures constructive/destructive interference

3. **Why the original was broken**:
   - Attention scaled amplitude, but we normalized amplitude away
   - Phase relationships were fixed at encoding, not affected by attention
   - Result: coherence was constant across all layers (no signal)

### Figures generated

1. `01_inverted_token_waves.png` - Token wave visualization
2. `02_coherence_comparison.png` - Compare v1 (broken) vs fixed measures
3. `03_prompt_comparison_v2.png` - Multi-prompt analysis
4. `04_wave_interference_detailed_v2.png` - Detailed interference visualization
5. `05_per_head_coherence.png` - Per-head coherence heatmaps
6. `06_wave_evolution_factual.png` - 12-layer evolution (factual prompt)
7. `07_wave_evolution_philosophical.png` - 12-layer evolution (philosophical prompt)

### Connection to AKIRA theory

- **RADAR_ARRAY.md**: Spectral decomposition of signals, frequency bands
- **HARMONY_AND_COHERENCE.md**: Phase locking, belief collapse as phase transition
- **ACTION_QUANTA.md**: Minimum actionable patterns, crystallization
