# Paper C - Fig C9: A_naive Confounding Demonstration

## 目的
交絡（confounding）の実証：
- **A_naive = cos(g_mix, g_ref)** がλに依存することを示す
- ρを固定しても、A_naiveはλで変動してしまう
- これが「方向と使用量の交絡」の本質

## 設計
- 較正済み設定（center_scale=2.5, noise_std=1.0）
- 軽量版：180 runs（約5分）

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
SAVE_DIR = '/content/drive/MyDrive/paper-C-results/E3_A_naive'
os.makedirs(SAVE_DIR, exist_ok=True)
print(f'Save directory: {SAVE_DIR}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time
from datetime import datetime

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
# 較正済み設定
CENTER_SCALE = 2.5
NOISE_STD_DATA = 1.0
N_TRAIN = 20000
N_TEST = 10000
N_CLASSES = 10
DIM_PER_VIEW = 8
CENTER_SEED = 0

# 軽量グリッド
NOISE_RATES = [0.0, 0.3, 0.5]
RHO_VALUES = [0.0, 0.4, 0.7, 1.0]  # 4点に絞る
LAMBDA_VALUES = [0.0, 0.25, 0.5, 0.75, 1.0]
SEEDS = [0, 1, 2]  # 3 seeds

EPOCHS = 100
LR = 0.1

TOTAL_RUNS = len(NOISE_RATES) * len(RHO_VALUES) * len(LAMBDA_VALUES) * len(SEEDS)
print(f'Total runs: {TOTAL_RUNS}')

In [None]:
# データ生成関数（E3と同じ）

def generate_orthogonal_centers(n_classes, dim_per_view, center_scale, seed=0):
    rng = np.random.RandomState(seed)
    total_dim = dim_per_view * 2
    random_matrix = rng.randn(total_dim, n_classes)
    Q, _ = np.linalg.qr(random_matrix)
    centers_joint = Q[:, :n_classes].T * center_scale
    centers_A = centers_joint[:, :dim_per_view]
    centers_B = centers_joint[:, dim_per_view:]
    return centers_A.astype(np.float32), centers_B.astype(np.float32)

def generate_two_view_data(n_samples, centers_A, centers_B, noise_std, sample_seed):
    n_classes = centers_A.shape[0]
    dim_per_view = centers_A.shape[1]
    rng = np.random.RandomState(sample_seed)
    labels = rng.randint(0, n_classes, n_samples)
    view_A = np.array([centers_A[l] + rng.randn(dim_per_view) * noise_std for l in labels])
    view_B = np.array([centers_B[l] + rng.randn(dim_per_view) * noise_std for l in labels])
    X = np.concatenate([view_A, view_B], axis=1)
    return X.astype(np.float32), labels.astype(np.int64)

def inject_label_noise(labels, noise_rate, n_classes=10, seed=42):
    if noise_rate == 0:
        return labels.copy()
    rng = np.random.RandomState(seed + 1000)
    noisy = labels.copy()
    n_noisy = int(noise_rate * len(labels))
    indices = rng.choice(len(labels), n_noisy, replace=False)
    for idx in indices:
        noisy[idx] = rng.choice([i for i in range(n_classes) if i != labels[idx]])
    return noisy

print('データ生成関数定義完了')

In [None]:
# モデル定義

class TwoViewMLP(nn.Module):
    def __init__(self, input_dim=16, hidden_dim=64, n_classes=10):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, n_classes)
        )
    
    def forward(self, x):
        return self.net(x)

print('モデル定義完了')

In [None]:
# 勾配操作関数

def get_gradient_vector(model):
    grads = []
    for p in model.parameters():
        if p.grad is not None:
            grads.append(p.grad.view(-1))
    return torch.cat(grads)

def set_gradient_vector(model, grad_vec):
    idx = 0
    for p in model.parameters():
        numel = p.numel()
        p.grad = grad_vec[idx:idx+numel].view(p.shape).clone()
        idx += numel

def orthogonalize(g_ref_norm, v):
    v_perp = v - torch.dot(v, g_ref_norm) * g_ref_norm
    norm = torch.norm(v_perp)
    if norm < 1e-10:
        v = torch.randn_like(v)
        v_perp = v - torch.dot(v, g_ref_norm) * g_ref_norm
        norm = torch.norm(v_perp)
    return v_perp / norm

def construct_g_value(g_ref_norm, rho, device):
    random_vec = torch.randn_like(g_ref_norm)
    g_perp = orthogonalize(g_ref_norm, random_vec)
    
    if abs(rho) >= 1.0:
        g_value = rho * g_ref_norm
    else:
        sqrt_term = np.sqrt(1 - rho**2)
        g_value = rho * g_ref_norm + sqrt_term * g_perp
    
    g_value = g_value / torch.norm(g_value)
    return g_value, g_perp

print('勾配操作関数定義完了')

In [None]:
# ★ A_naive計測を追加した実験関数

def run_experiment_with_A_naive(noise_rate, rho, lam, model_seed):
    """
    A_naive = cos(g_mix, g_ref) を記録する実験
    """
    centers_A, centers_B = generate_orthogonal_centers(
        N_CLASSES, DIM_PER_VIEW, CENTER_SCALE, seed=CENTER_SEED
    )
    
    X_train, y_train_clean = generate_two_view_data(
        N_TRAIN, centers_A, centers_B, NOISE_STD_DATA, sample_seed=42
    )
    X_test, y_test = generate_two_view_data(
        N_TEST, centers_A, centers_B, NOISE_STD_DATA, sample_seed=43
    )
    
    y_train_noisy = inject_label_noise(y_train_clean, noise_rate, N_CLASSES, seed=model_seed)
    
    X_train_t = torch.tensor(X_train, device=device)
    y_clean_t = torch.tensor(y_train_clean, device=device)
    y_noisy_t = torch.tensor(y_train_noisy, device=device)
    X_test_t = torch.tensor(X_test, device=device)
    y_test_t = torch.tensor(y_test, device=device)
    
    set_seed(model_seed)
    model = TwoViewMLP().to(device)
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    
    # ★ A_naive記録用
    A_naive_values = []
    cos_gval_gref_values = []  # cos(g_value, g_ref) も記録
    
    for epoch in range(EPOCHS):
        model.train()
        
        # g_ref（clean labels）
        optimizer.zero_grad()
        loss_ref = criterion(model(X_train_t), y_clean_t)
        loss_ref.backward()
        g_ref = get_gradient_vector(model).clone()
        g_ref_norm = g_ref / torch.norm(g_ref)
        g_ref_scale = torch.norm(g_ref)
        
        # g_struct（noisy labels）
        optimizer.zero_grad()
        loss_struct = criterion(model(X_train_t), y_noisy_t)
        loss_struct.backward()
        g_struct = get_gradient_vector(model).clone()
        g_struct_norm = g_struct / torch.norm(g_struct)
        g_struct_scale = torch.norm(g_struct)
        
        # g_value（ρ-design）
        g_value, _ = construct_g_value(g_ref_norm, rho, device)
        
        # cos(g_value, g_ref) の検証
        cos_gval_gref = torch.dot(g_value, g_ref_norm).item()
        cos_gval_gref_values.append(cos_gval_gref)
        
        # g_mix = λ * g_struct + (1-λ) * g_value
        g_mix = lam * g_struct_norm + (1 - lam) * g_value
        g_mix_norm = g_mix / torch.norm(g_mix)
        
        # ★ A_naive = cos(g_mix, g_ref)
        A_naive = torch.dot(g_mix_norm, g_ref_norm).item()
        A_naive_values.append(A_naive)
        
        # スケール復元して更新
        g_mix_scaled = g_mix_norm * g_struct_scale
        optimizer.zero_grad()
        set_gradient_vector(model, g_mix_scaled)
        optimizer.step()
    
    # 評価
    model.eval()
    with torch.no_grad():
        test_acc = (model(X_test_t).argmax(1) == y_test_t).float().mean().item()
    
    return {
        'noise_rate': noise_rate,
        'rho': rho,
        'lambda': lam,
        'seed': model_seed,
        'test_acc': test_acc,
        'A_naive_mean': np.mean(A_naive_values),
        'A_naive_std': np.std(A_naive_values),
        'A_naive_final': A_naive_values[-1],
        'cos_gval_gref_mean': np.mean(cos_gval_gref_values)
    }

print('実験関数定義完了（A_naive計測付き）')

In [None]:
# 動作確認

print('=== 動作確認 ===')
result = run_experiment_with_A_naive(noise_rate=0.3, rho=0.7, lam=0.5, model_seed=0)
print(f'noise=30%, ρ=0.7, λ=0.5')
print(f'  test_acc = {result["test_acc"]:.3f}')
print(f'  cos(g_val, g_ref) = {result["cos_gval_gref_mean"]:.4f} (target: 0.7)')
print(f'  A_naive = cos(g_mix, g_ref) = {result["A_naive_mean"]:.4f}')
print('\n✓ 動作確認OK')

In [None]:
# 本番実験

print('\n' + '=' * 60)
print('A_naive Confounding Experiment')
print(f'Total: {TOTAL_RUNS} runs')
print('=' * 60 + '\n')

results = []
start_time = time.time()
run_count = 0

for noise_rate in NOISE_RATES:
    noise_pct = int(noise_rate * 100)
    print(f'\n=== NOISE {noise_pct}% ===')
    
    for rho in RHO_VALUES:
        for lam in LAMBDA_VALUES:
            for seed in SEEDS:
                run_count += 1
                result = run_experiment_with_A_naive(noise_rate, rho, lam, seed)
                results.append(result)
                
                if run_count % 30 == 0:
                    elapsed = time.time() - start_time
                    eta = elapsed / run_count * (TOTAL_RUNS - run_count)
                    print(f'  [{run_count:3d}/{TOTAL_RUNS}] ρ={rho:+.1f} λ={lam:.2f} | '
                          f'A_naive={result["A_naive_mean"]:.3f} | ETA: {eta/60:.1f}min')

total_time = time.time() - start_time
print(f'\n完了！ Total time: {total_time/60:.1f} min')

In [None]:
# 結果集計

df = pd.DataFrame(results)

df_agg = df.groupby(['noise_rate', 'rho', 'lambda']).agg({
    'test_acc': ['mean', 'std'],
    'A_naive_mean': ['mean', 'std'],
    'cos_gval_gref_mean': ['mean']
}).reset_index()
df_agg.columns = ['noise_rate', 'rho', 'lambda', 
                  'test_acc_mean', 'test_acc_std',
                  'A_naive_mean', 'A_naive_std',
                  'cos_gval_gref']

print('集計完了')
print(df_agg.head(10))

In [None]:
# ★ Fig C9: A_naive のλ依存（交絡の実証）

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
colors = plt.cm.viridis(np.linspace(0, 1, len(RHO_VALUES)))

for idx, noise_rate in enumerate(NOISE_RATES):
    ax = axes[idx]
    noise_pct = int(noise_rate * 100)
    
    df_noise = df_agg[df_agg['noise_rate'] == noise_rate]
    
    for rho_idx, rho in enumerate(RHO_VALUES):
        df_rho = df_noise[df_noise['rho'] == rho]
        ax.errorbar(df_rho['lambda'], df_rho['A_naive_mean'], 
                   yerr=df_rho['A_naive_std'],
                   marker='o', label=f'ρ={rho:+.1f}', 
                   color=colors[rho_idx], capsize=3, linewidth=2)
        
        # ρの理想値を破線で表示
        ax.axhline(y=rho, color=colors[rho_idx], linestyle='--', alpha=0.3)
    
    ax.set_xlabel('λ (structure weight)', fontsize=12)
    ax.set_ylabel('A_naive = cos(g_mix, g_ref)', fontsize=12)
    ax.set_title(f'Noise = {noise_pct}%', fontsize=14, fontweight='bold')
    ax.legend(loc='best', fontsize=10)
    ax.set_ylim(-0.2, 1.1)
    ax.grid(True, alpha=0.3)
    
    # 交絡の説明を追加
    ax.text(0.5, -0.1, 'A_naive varies with λ\n(confounding)', 
           ha='center', fontsize=9, style='italic', color='red')

fig.suptitle('Fig C9: Confounding Demonstration\nA_naive depends on λ even when ρ is fixed', 
            fontsize=16, fontweight='bold', y=1.05)
plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/fig_C9_A_naive_confounding.png', dpi=300, bbox_inches='tight')
plt.show()

print('✓ Fig C9 保存完了')

In [None]:
# 交絡の定量的評価

print('\n=== 交絡の定量的評価 ===')
print('\nρ固定時のA_naiveのλによる変動幅：')

for noise_rate in NOISE_RATES:
    noise_pct = int(noise_rate * 100)
    print(f'\nNoise = {noise_pct}%:')
    
    df_noise = df_agg[df_agg['noise_rate'] == noise_rate]
    
    for rho in RHO_VALUES:
        df_rho = df_noise[df_noise['rho'] == rho]
        A_naive_range = df_rho['A_naive_mean'].max() - df_rho['A_naive_mean'].min()
        print(f'  ρ={rho:+.1f}: A_naive range = {A_naive_range:.3f} '
              f'(min={df_rho["A_naive_mean"].min():.3f}, max={df_rho["A_naive_mean"].max():.3f})')

In [None]:
# 結果保存

df.to_csv(f'{SAVE_DIR}/A_naive_results_raw.csv', index=False)
df_agg.to_csv(f'{SAVE_DIR}/A_naive_results_aggregated.csv', index=False)

# サマリーJSON
summary = {
    'experiment': 'A_naive_confounding',
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'purpose': 'Demonstrate that A_naive = cos(g_mix, g_ref) depends on lambda (confounding)',
    'grid': {
        'noise_rates': NOISE_RATES,
        'rho_values': RHO_VALUES,
        'lambda_values': LAMBDA_VALUES,
        'n_seeds': len(SEEDS)
    },
    'total_runs': TOTAL_RUNS,
    'key_finding': 'A_naive varies with lambda even when rho is fixed, demonstrating confounding'
}

with open(f'{SAVE_DIR}/A_naive_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print('\n=== 保存完了 ===')
print(f'  {SAVE_DIR}/A_naive_results_raw.csv')
print(f'  {SAVE_DIR}/A_naive_results_aggregated.csv')
print(f'  {SAVE_DIR}/A_naive_summary.json')
print(f'  {SAVE_DIR}/fig_C9_A_naive_confounding.png')

In [None]:
# 最終サマリー

print('\n' + '=' * 70)
print('A_naive Confounding Experiment 完了')
print('=' * 70)

print('\n■ 交絡の実証:')
print('  A_naive = cos(g_mix, g_ref) は λ に依存する')
print('  → ρ（方向）を測りたいのに、λ（使用量）の影響を受ける')
print('  → これが「交絡」の本質')

print('\n■ Fig C9 の読み方:')
print('  - 各線はρを固定')
print('  - λを動かすとA_naiveが変動 → 交絡あり')
print('  - 破線は「理想的なρ値」')

print('\n' + '=' * 70)