# Exp L: Causal Intervention - Value Alignment Controls Performance

## 目的
「価値整合度（clean方向へのcosθ）が性能を因果的に支配する」ことを証明する。

## 核アイデア
clean labels の勾配 g_clean を「真の価値方向」として定義し、
この方向への整合度（cosθ）を直接制御した人工的な価値勾配を構成する。

```
g_value(c) = c·ĝ_clean + √(1-c²)·u
```

ここで：
- ĝ_clean = clean labels の勾配（正規化）
- c = cos_target（-0.9 〜 +0.9）
- u = ĝ_clean に直交する単位ベクトル

これにより cos(g_value, g_clean) = c が保証される。

## exp_I との違い（重要）
- exp_I: g_struct（noisy）を基準にcosθを制御 → 解釈が逆転
- exp_L: g_clean（clean）を基準にcosθを制御 → 正しい因果介入

## 実験設計
- **ノイズ率**: η = 0.4
- **λ**: 0.30, 0.35, 0.40（ordered領域）
- **cos_target**: -0.9, -0.6, -0.3, 0.0, +0.3, +0.6, +0.9（7点）
- **Seeds**: 0, 1, 2

## Runs計算
3 λ × 7 c × 3 seeds = **63 runs**

## 期待される結果
- c > 0（clean方向に整合）→ **性能改善**
- c < 0（clean方向と逆）→ **性能悪化**
- c = 0（cleanに直交）→ 中間

→ **価値整合度が性能を直接支配する因果的証拠**

In [None]:
# ===== セットアップ =====
from google.colab import drive
drive.mount('/content/drive')

import os
from datetime import datetime

EXP_NAME = 'exp_L_causal_intervention'
TIMESTAMP = datetime.now().strftime('%Y%m%d_%H%M%S')
BASE_DIR = '/content/drive/MyDrive/dual-gradient-learning/Paper-A'
SAVE_DIR = f'{BASE_DIR}/{EXP_NAME}_{TIMESTAMP}'
os.makedirs(SAVE_DIR, exist_ok=True)
os.makedirs(f'{SAVE_DIR}/figures', exist_ok=True)

print(f'Experiment: {EXP_NAME}')
print(f'Timestamp: {TIMESTAMP}')
print(f'Save directory: {SAVE_DIR}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils import parameters_to_vector
import torchvision
import torchvision.transforms as transforms
import numpy as np
import json
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    print(f'GPU: {torch.cuda.get_device_name(0)}')
print(f'Device: {device}')

In [None]:
# ===== モデル定義 =====
class IndexedDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        return img, label, idx
    def __len__(self):
        return len(self.dataset)

class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, 3, stride, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, 3, 1, 1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, 1, stride, bias=False),
                nn.BatchNorm2d(planes))
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return F.relu(out)

class ResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(64, 2, 1)
        self.layer2 = self._make_layer(128, 2, 2)
        self.layer3 = self._make_layer(256, 2, 2)
        self.layer4 = self._make_layer(512, 2, 2)
        self.linear = nn.Linear(512, num_classes)
    def _make_layer(self, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for s in strides:
            layers.append(BasicBlock(self.in_planes, planes, s))
            self.in_planes = planes
        return nn.Sequential(*layers)
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.adaptive_avg_pool2d(out, 1)
        out = out.view(out.size(0), -1)
        return self.linear(out)

In [None]:
# ===== 実験パラメータ =====
BATCH_SIZE = 256
NUM_WORKERS = 4
EPOCHS = 100
LR = 0.1
K = 16  # g_clean のキャッシュ更新頻度

NOISE_RATE = 0.4
LAMBDAS = [0.30, 0.35, 0.40]
COSINES = [-0.9, -0.6, -0.3, 0.0, 0.3, 0.6, 0.9]
SEEDS = [0, 1, 2]

# 実験条件リスト生成
experiments = []
for lam in LAMBDAS:
    for cos_target in COSINES:
        for seed in SEEDS:
            experiments.append({
                'lambda': lam,
                'cos_target': cos_target,
                'seed': seed
            })

total_runs = len(experiments)
print(f'Total runs: {total_runs}')
print(f'Estimated time: {total_runs * 9.5 / 60:.1f} hours')

# config保存
config = {
    'experiment': EXP_NAME,
    'timestamp': TIMESTAMP,
    'description': 'Causal intervention: control alignment to g_clean (NOT g_struct)',
    'parameters': {
        'lambdas': LAMBDAS,
        'cosines': COSINES,
        'seeds': SEEDS,
        'noise_rate': NOISE_RATE,
        'epochs': EPOCHS,
        'batch_size': BATCH_SIZE,
        'lr': LR,
        'K': K
    },
    'total_runs': total_runs
}
with open(f'{SAVE_DIR}/exp_L_config.json', 'w') as f:
    json.dump(config, f, indent=2)
print(f'Config saved')

In [None]:
# ===== ユーティリティ関数 =====
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

def load_cifar10():
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    return trainset, testset

def get_data_loaders(trainset, testset):
    indexed_trainset = IndexedDataset(trainset)
    train_loader = DataLoader(indexed_trainset, batch_size=BATCH_SIZE, shuffle=True,
                              num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True, drop_last=True)
    test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False,
                             num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)
    return train_loader, test_loader

def inject_noise(labels, noise_rate, seed):
    np.random.seed(seed + 1000)
    noisy_labels = labels.copy()
    n_noisy = int(noise_rate * len(labels))
    noisy_indices = np.random.choice(len(labels), n_noisy, replace=False)
    for idx in noisy_indices:
        choices = [i for i in range(10) if i != labels[idx]]
        noisy_labels[idx] = np.random.choice(choices)
    return noisy_labels

def evaluate(model, test_loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device, non_blocking=True), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    return correct / total

In [None]:
# ===== 核心: g_clean を基準に cosθ を制御する価値勾配の構成 =====
def construct_value_gradient_aligned_to_clean(g_clean, cos_target):
    """
    clean勾配 g_clean を基準に、cos(g_value, g_clean) = cos_target となる
    人工的な価値勾配を構成する。
    
    g_value(c) = c·ĝ_clean + √(1-c²)·u
    
    ここで：
    - ĝ_clean = g_clean の正規化
    - c = cos_target
    - u = ĝ_clean に直交する単位ベクトル
    
    【重要】exp_I との違い:
    - exp_I: g_struct（noisy方向）を基準 → 解釈が逆転
    - exp_L: g_clean（価値方向）を基準 → 正しい因果介入
    """
    # clean勾配を正規化
    g_clean_norm = g_clean / (g_clean.norm() + 1e-12)
    
    # ランダムベクトルを生成し、g_cleanに直交する成分を抽出
    random_vec = torch.randn_like(g_clean)
    
    # Gram-Schmidt直交化
    proj = (random_vec @ g_clean_norm) * g_clean_norm
    orthogonal = random_vec - proj
    u = orthogonal / (orthogonal.norm() + 1e-12)
    
    # 目標cosineを持つ価値勾配を構成
    cos_target_clamped = max(-0.999, min(0.999, cos_target))
    sin_component = np.sqrt(1 - cos_target_clamped**2)
    
    g_value = cos_target_clamped * g_clean_norm + sin_component * u
    
    # 検証: 実際のcosine（g_cleanとの角度）を計算
    actual_cos_to_clean = (g_clean_norm @ g_value).item()
    
    return g_value, actual_cos_to_clean

In [None]:
# ===== 学習関数: 正しい因果介入版 =====
def train_with_causal_intervention(model, train_loader, test_loader, 
                                    clean_labels, noisy_labels, lam, cos_target):
    """
    g_clean（価値方向）への整合度を直接制御したdual-gradient learning。
    
    【重要な設計】
    1. g_struct = noisy labels からの勾配（データに従う力）
    2. g_clean = clean labels からの勾配（真の価値方向）
    3. g_value = g_clean 方向への整合度が cos_target になるよう人工構成
    4. g_mix = (1-λ) * g_struct + λ * g_value
    
    cos_target > 0 → g_value は g_clean に近い → 価値整合 → 改善期待
    cos_target < 0 → g_value は g_clean と逆 → 価値非整合 → 悪化期待
    """
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 75], gamma=0.1)
    criterion = nn.CrossEntropyLoss()
    
    clean_labels_tensor = torch.tensor(clean_labels, device=device)
    noisy_labels_tensor = torch.tensor(noisy_labels, device=device)
    
    cached_g_clean = None
    global_step = 0
    
    best_acc = 0
    cos_to_clean_history = []  # g_value と g_clean の実際の角度
    cos_struct_clean_history = []  # g_struct と g_clean の実際の角度（参考）
    history = {'epoch': [], 'test_acc': [], 'test_error': []}
    
    for epoch in range(EPOCHS):
        model.train()
        epoch_cos_to_clean = []
        epoch_cos_struct_clean = []
        
        for inputs, _, indices in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            indices = indices.to(device, non_blocking=True)
            batch_noisy = noisy_labels_tensor[indices]
            batch_clean = clean_labels_tensor[indices]
            
            # ===== Step 1: Structure gradient (from noisy labels) =====
            optimizer.zero_grad()
            outputs = model(inputs)
            loss_struct = criterion(outputs, batch_noisy)
            loss_struct.backward(retain_graph=True)
            g_struct = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            # ===== Step 2: Clean gradient (価値の基準方向) =====
            if global_step % K == 0 or cached_g_clean is None:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss_clean = criterion(outputs, batch_clean)
                loss_clean.backward()
                cached_g_clean = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            # ===== Step 3: g_clean を基準に cos_target の角度を持つ g_value を構成 =====
            g_value, actual_cos_to_clean = construct_value_gradient_aligned_to_clean(
                cached_g_clean, cos_target
            )
            epoch_cos_to_clean.append(actual_cos_to_clean)
            
            # 参考: g_struct と g_clean の角度も記録
            g_struct_norm = g_struct / (g_struct.norm() + 1e-12)
            g_clean_norm = cached_g_clean / (cached_g_clean.norm() + 1e-12)
            cos_struct_clean = (g_struct_norm @ g_clean_norm).item()
            epoch_cos_struct_clean.append(cos_struct_clean)
            
            # ===== Step 4: Normalize and mix =====
            g_value_norm = g_value / (g_value.norm() + 1e-12)
            g_mix = (1 - lam) * g_struct_norm + lam * g_value_norm
            
            # Apply mixed gradient
            optimizer.zero_grad()
            idx = 0
            for p in model.parameters():
                numel = p.numel()
                p.grad = g_mix[idx:idx+numel].view(p.shape).clone()
                idx += numel
            optimizer.step()
            global_step += 1
        
        scheduler.step()
        cos_to_clean_history.append(np.mean(epoch_cos_to_clean))
        cos_struct_clean_history.append(np.mean(epoch_cos_struct_clean))
        
        # 10エポックごとに評価
        if (epoch + 1) % 10 == 0:
            acc = evaluate(model, test_loader)
            best_acc = max(best_acc, acc)
            history['epoch'].append(epoch + 1)
            history['test_acc'].append(acc)
            history['test_error'].append(1 - acc)
    
    final_acc = evaluate(model, test_loader)
    avg_cos_to_clean = np.mean(cos_to_clean_history)
    avg_cos_struct_clean = np.mean(cos_struct_clean_history)
    
    return final_acc, max(best_acc, final_acc), avg_cos_to_clean, avg_cos_struct_clean, history

In [None]:
# ===== データ準備 =====
trainset, testset = load_cifar10()
clean_labels = np.array(trainset.targets)
train_loader, test_loader = get_data_loaders(trainset, testset)

print('Verifying IndexedDataset...')
sample_batch = next(iter(train_loader))
print(f'  ✓ Batch: images {list(sample_batch[0].shape)}, labels {list(sample_batch[1].shape)}, indices {list(sample_batch[2].shape)}')

# GPU warmup
warmup_model = ResNet18().to(device)
for _ in range(20):
    _ = warmup_model(torch.randn(BATCH_SIZE, 3, 32, 32, device=device))
del warmup_model
torch.cuda.empty_cache()
print('Warmup complete.')

In [None]:
# ===== メイン実験ループ =====
results = []
checkpoint_file = f'{SAVE_DIR}/exp_L_checkpoint.json'
completed = set()

# チェックポイント復元
if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'r') as f:
        results = json.load(f)
    for r in results:
        completed.add((r['lambda'], r['cos_target'], r['seed']))
    print(f'Checkpoint loaded: {len(completed)} runs completed')

# ノイズラベル生成（seed=0で固定、実験間で同じノイズパターン）
noisy_labels = inject_noise(clean_labels, NOISE_RATE, seed=0)
print(f'Noise injected: {np.mean(noisy_labels != clean_labels)*100:.1f}% corrupted')

run_counter = 0
exp_start = time.time()

for exp in experiments:
    run_counter += 1
    lam = exp['lambda']
    cos_target = exp['cos_target']
    seed = exp['seed']
    
    key = (lam, cos_target, seed)
    if key in completed:
        continue
    
    print(f'\n[{run_counter}/{total_runs}] λ={lam:.2f} cos_target={cos_target:+.1f} seed={seed}')
    t0 = time.time()
    
    set_seed(seed)
    model = ResNet18().to(device)
    
    final_acc, best_acc, avg_cos_to_clean, avg_cos_struct_clean, history = train_with_causal_intervention(
        model, train_loader, test_loader,
        clean_labels, noisy_labels, lam, cos_target
    )
    elapsed = time.time() - t0
    
    result = {
        'experiment_id': f'L-{run_counter:03d}',
        'experiment': EXP_NAME,
        'lambda': lam,
        'cos_target': cos_target,
        'cos_actual_to_clean': avg_cos_to_clean,
        'cos_struct_clean': avg_cos_struct_clean,
        'seed': seed,
        'noise_rate': NOISE_RATE,
        'test_acc': final_acc,
        'test_error': 1 - final_acc,
        'best_acc': best_acc,
        'best_error': 1 - best_acc,
        'time_seconds': elapsed,
        'history': history
    }
    results.append(result)
    
    # チェックポイント保存
    with open(checkpoint_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    # ステータス表示
    status = ''
    if cos_target > 0 and (1 - final_acc) < 0.15:
        status = ' ✅ IMPROVED (expected for cos>0)'
    elif cos_target < 0 and (1 - final_acc) > 0.25:
        status = ' ⚠️ DEGRADED (expected for cos<0)'
    
    print(f'  Error: {1-final_acc:.4f} | cos_to_clean: {avg_cos_to_clean:.4f} | Time: {elapsed/60:.1f} min{status}')
    
    # 進捗推定
    completed_count = len(results)
    avg_time = sum([r['time_seconds'] for r in results]) / completed_count
    remaining = total_runs - completed_count
    eta_hours = (remaining * avg_time) / 3600
    print(f'  Progress: {completed_count}/{total_runs} | ETA: {eta_hours:.1f} hours')

print(f'\n{"="*60}')
print(f'EXPERIMENT COMPLETE')
print(f'Total time: {(time.time()-exp_start)/3600:.2f} hours')
print(f'{"="*60}')

In [None]:
# ===== 結果保存 =====
import pandas as pd

with open(f'{SAVE_DIR}/exp_L_results.json', 'w') as f:
    json.dump(results, f, indent=2)

results_flat = [{k: v for k, v in r.items() if k != 'history'} for r in results]
df = pd.DataFrame(results_flat)
df.to_csv(f'{SAVE_DIR}/exp_L_results.csv', index=False)

print(f'Results saved to {SAVE_DIR}/')

In [None]:
# ===== 因果分析 =====
import pandas as pd
import numpy as np

df = pd.DataFrame([{k: v for k, v in r.items() if k != 'history'} for r in results])

print('='*70)
print('CAUSAL ANALYSIS: cos(g_value, g_clean) → Performance')
print('='*70)
print('\n【重要】cos_target > 0 は「価値（clean方向）に整合」を意味する')
print('        → 期待: cos > 0 で改善、cos < 0 で悪化\n')

# λごとの分析
for lam in LAMBDAS:
    print(f'\n--- λ = {lam} ---')
    df_l = df[df['lambda'] == lam]
    
    stats = df_l.groupby('cos_target').agg({
        'test_error': ['mean', 'std'],
        'cos_actual_to_clean': 'mean'
    }).round(4)
    print(stats)
    
    # 相関計算
    corr = df_l['cos_target'].corr(df_l['test_error'])
    print(f'\n  Correlation(cos_target, error): {corr:.4f}')
    if corr < -0.7:
        print('  → ✅ STRONG NEGATIVE: Higher alignment → Lower error (CAUSAL SUPPORT)')
    elif corr > 0.7:
        print('  → ⚠️ UNEXPECTED POSITIVE: Check design!')

# 全体の相関
print('\n' + '='*70)
print('OVERALL CORRELATION')
print('='*70)
overall_corr = df['cos_target'].corr(df['test_error'])
print(f'Correlation(cos_target, test_error): {overall_corr:.4f}')

if overall_corr < -0.5:
    print('\n✅ CAUSAL PROOF ESTABLISHED:')
    print('   Value alignment (cos to g_clean) directly controls performance.')
    print('   Higher alignment → Better generalization.')

In [None]:
# ===== 可視化: cos_target vs Performance =====
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, lam in enumerate(LAMBDAS):
    ax = axes[i]
    df_l = df[df['lambda'] == lam]
    
    stats = df_l.groupby('cos_target')['test_error'].agg(['mean', 'std'])
    
    ax.errorbar(stats.index, stats['mean'], yerr=stats['std'],
                marker='o', capsize=4, linewidth=2, markersize=8, color='C0')
    
    # 参照線
    ax.axvline(x=0, color='gray', linestyle=':', alpha=0.5, label='Orthogonal')
    
    # 期待される傾向を示す矢印
    ax.annotate('', xy=(0.8, stats['mean'].min()), xytext=(0.8, stats['mean'].max()),
                arrowprops=dict(arrowstyle='->', color='green', lw=2))
    ax.text(0.85, (stats['mean'].min() + stats['mean'].max())/2, 'Expected\nimprovement',
            fontsize=9, color='green', va='center')
    
    ax.set_xlabel('cos(g_value, g_clean) - Target', fontsize=12)
    ax.set_ylabel('Test Error', fontsize=12)
    ax.set_title(f'Causal Intervention: λ = {lam}', fontsize=13, fontweight='bold')
    ax.legend()
    ax.grid(alpha=0.3)
    ax.set_xlim([-1, 1])

plt.suptitle('Value Alignment (to g_clean) Controls Performance', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/figures/exp_L_causal_intervention.png', dpi=300, bbox_inches='tight')
plt.show()
print(f'Figure saved: {SAVE_DIR}/figures/exp_L_causal_intervention.png')

In [None]:
# ===== 可視化: λ × cos_target ヒートマップ =====
import matplotlib.pyplot as plt
import numpy as np

# ピボットテーブル作成
pivot = df.pivot_table(values='test_error', index='cos_target', columns='lambda', aggfunc='mean')

fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(pivot.values, cmap='RdYlGn_r', aspect='auto',
               extent=[min(LAMBDAS)-0.025, max(LAMBDAS)+0.025, min(COSINES)-0.15, max(COSINES)+0.15])

ax.set_xlabel('λ', fontsize=12)
ax.set_ylabel('cos(g_value, g_clean)', fontsize=12)
ax.set_title('Phase Diagram: λ × Value Alignment → Test Error', fontsize=13, fontweight='bold')

cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Test Error', fontsize=11)

# 注釈
ax.axhline(y=0, color='white', linestyle='--', linewidth=1, alpha=0.7)
ax.text(max(LAMBDAS)+0.03, 0.6, 'Aligned\n(expected: good)', fontsize=9, va='center')
ax.text(max(LAMBDAS)+0.03, -0.6, 'Anti-aligned\n(expected: bad)', fontsize=9, va='center')

plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/figures/exp_L_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()
print(f'Figure saved: {SAVE_DIR}/figures/exp_L_heatmap.png')

In [None]:
# ===== exp_I との比較（もし両方の結果がある場合） =====
print('\n' + '='*70)
print('COMPARISON: exp_I vs exp_L')
print('='*70)
print('\nexp_I: cos_target は g_struct（noisy方向）との角度')
print('       → cos > 0 は「ノイズ方向に従う」→ 悪化が期待される')
print('\nexp_L: cos_target は g_clean（価値方向）との角度')
print('       → cos > 0 は「価値方向に整合」→ 改善が期待される')
print('\n【結論】因果介入実験では「何を基準にcosを定義するか」が決定的に重要')

In [None]:
# ===== サマリー =====
print('\n' + '='*70)
print('EXPERIMENT L: CAUSAL INTERVENTION - SUMMARY')
print('='*70)

# 主要な発見
df_positive = df[df['cos_target'] > 0]['test_error'].mean()
df_negative = df[df['cos_target'] < 0]['test_error'].mean()
df_zero = df[df['cos_target'] == 0]['test_error'].mean()

print(f'\nMean Test Error by alignment region:')
print(f'  cos > 0 (aligned to clean):     {df_positive:.4f}')
print(f'  cos = 0 (orthogonal to clean):  {df_zero:.4f}')
print(f'  cos < 0 (anti-aligned to clean): {df_negative:.4f}')

print(f'\nConclusion:')
if df_positive < df_zero < df_negative:
    print('  ✅ CAUSAL PROOF ESTABLISHED')
    print('  ✅ Value alignment (cos to g_clean) directly controls performance')
    print('  ✅ Higher alignment → Better generalization')
    print('  ✅ This is the expected result for a correct causal intervention')
else:
    print('  ⚠️ Results require further analysis')

print(f'\nSave directory: {SAVE_DIR}')