# Paper C - 較正実験 (Calibration) v4

## v4での修正
- **SNR範囲を下げる**：v3では課題が簡単すぎた（全設定で100%）
- center_scale: 5-15 → **1-3**
- noise_std: 0.25-0.50 → **1.0-3.0**
- これによりSNR = 0.33〜3.0の範囲で探索

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
SAVE_DIR = '/content/drive/MyDrive/paper-C-results/calibration_v4'
os.makedirs(SAVE_DIR, exist_ok=True)
print(f'Save directory: {SAVE_DIR}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from datetime import datetime

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
# データ生成関数（v3と同じ）

def generate_orthogonal_centers(n_classes, dim_per_view, center_scale, seed=0):
    rng = np.random.RandomState(seed)
    total_dim = dim_per_view * 2
    
    random_matrix = rng.randn(total_dim, n_classes)
    Q, _ = np.linalg.qr(random_matrix)
    centers_joint = Q[:, :n_classes].T * center_scale
    
    centers_A = centers_joint[:, :dim_per_view]
    centers_B = centers_joint[:, dim_per_view:]
    
    return centers_A.astype(np.float32), centers_B.astype(np.float32)

def generate_two_view_data(n_samples, centers_A, centers_B, noise_std, sample_seed):
    n_classes = centers_A.shape[0]
    dim_per_view = centers_A.shape[1]
    
    rng = np.random.RandomState(sample_seed)
    labels = rng.randint(0, n_classes, n_samples)
    
    view_A = np.array([centers_A[l] + rng.randn(dim_per_view) * noise_std for l in labels])
    view_B = np.array([centers_B[l] + rng.randn(dim_per_view) * noise_std for l in labels])
    
    X = np.concatenate([view_A, view_B], axis=1)
    return X.astype(np.float32), labels.astype(np.int64)

print('データ生成関数定義完了')

In [None]:
# モデル定義

class TwoViewMLP(nn.Module):
    def __init__(self, input_dim=16, hidden_dim=64, n_classes=10):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, n_classes)
        )
    
    def forward(self, x):
        return self.net(x)

print('モデル定義完了')

In [None]:
# ★★★ v4: SNR範囲を調整 ★★★

N_TRAIN = 20000
N_TEST = 10000
N_CLASSES = 10
DIM_PER_VIEW = 8

# v4: 課題を難しくする
CENTER_SCALES = [1.0, 1.5, 2.0, 2.5, 3.0]  # 小さく
NOISE_STDS = [1.0, 1.5, 2.0, 2.5, 3.0]     # 大きく

EPOCHS = 100
LR = 0.1
SEEDS = [0, 1, 2]

CENTER_SEED = 0
TRAIN_SAMPLE_SEED = 42
TEST_SAMPLE_SEED = 43

TARGET_ACC_MIN = 0.70
TARGET_ACC_MAX = 0.85

# SNR範囲を計算して表示
snr_min = min(CENTER_SCALES) / max(NOISE_STDS)
snr_max = max(CENTER_SCALES) / min(NOISE_STDS)

print(f'=== 較正実験設定 (v4) ===')
print(f'★ v4修正: SNR範囲を下げる（課題を難しくする）')
print(f'  center_scale: {CENTER_SCALES}')
print(f'  noise_std: {NOISE_STDS}')
print(f'  SNR範囲: {snr_min:.2f} - {snr_max:.2f}')
print(f'\nデータ: train={N_TRAIN}, test={N_TEST}')
print(f'ターゲット精度: {TARGET_ACC_MIN*100:.0f}% - {TARGET_ACC_MAX*100:.0f}%')
print(f'総実験数: {len(CENTER_SCALES) * len(NOISE_STDS) * len(SEEDS)}')

In [None]:
# 較正実験の実行関数

def run_calibration_trial(center_scale, noise_std, model_seed):
    centers_A, centers_B = generate_orthogonal_centers(
        N_CLASSES, DIM_PER_VIEW, center_scale, seed=CENTER_SEED
    )
    
    X_train, y_train = generate_two_view_data(
        N_TRAIN, centers_A, centers_B, noise_std, sample_seed=TRAIN_SAMPLE_SEED
    )
    X_test, y_test = generate_two_view_data(
        N_TEST, centers_A, centers_B, noise_std, sample_seed=TEST_SAMPLE_SEED
    )
    
    X_train_t = torch.tensor(X_train, device=device)
    y_train_t = torch.tensor(y_train, device=device)
    X_test_t = torch.tensor(X_test, device=device)
    y_test_t = torch.tensor(y_test, device=device)
    
    set_seed(model_seed)
    model = TwoViewMLP().to(device)
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        loss = criterion(model(X_train_t), y_train_t)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        train_acc = (model(X_train_t).argmax(1) == y_train_t).float().mean().item()
        test_acc = (model(X_test_t).argmax(1) == y_test_t).float().mean().item()
    
    return train_acc, test_acc

print('較正実験関数定義完了')

In [None]:
# 動作確認（中間的なSNRで）

print('=== 動作確認 ===')
for cs, ns in [(3.0, 1.0), (2.0, 2.0), (1.0, 3.0)]:
    snr = cs / ns
    train_acc, test_acc = run_calibration_trial(cs, ns, model_seed=0)
    print(f'scale={cs}, noise={ns} (SNR={snr:.2f}) | train={train_acc:.3f}, test={test_acc:.3f}')

In [None]:
# 較正実験の実行

print('\n=== 較正実験開始 ===')
print(f'ターゲット: test accuracy {TARGET_ACC_MIN*100:.0f}%-{TARGET_ACC_MAX*100:.0f}%')
print('-' * 70)

results = []
total = len(CENTER_SCALES) * len(NOISE_STDS) * len(SEEDS)
run_count = 0

for center_scale in CENTER_SCALES:
    for noise_std in NOISE_STDS:
        seed_results = []
        
        for seed in SEEDS:
            run_count += 1
            train_acc, test_acc = run_calibration_trial(center_scale, noise_std, seed)
            seed_results.append({'train': train_acc, 'test': test_acc})
        
        mean_train = np.mean([r['train'] for r in seed_results])
        mean_test = np.mean([r['test'] for r in seed_results])
        std_test = np.std([r['test'] for r in seed_results])
        snr = center_scale / noise_std
        
        in_target = TARGET_ACC_MIN <= mean_test <= TARGET_ACC_MAX
        marker = '★' if in_target else ''
        
        results.append({
            'center_scale': center_scale,
            'noise_std': noise_std,
            'snr': snr,
            'train_acc': mean_train,
            'test_acc': mean_test,
            'test_std': std_test,
            'in_target': in_target
        })
        
        print(f'[{run_count:3d}/{total}] scale={center_scale:.1f}, noise={noise_std:.1f} (SNR={snr:.2f}) | '
              f'train={mean_train:.3f}, test={mean_test:.3f}±{std_test:.3f} {marker}')

print('-' * 70)
print('較正実験完了')

In [None]:
# 結果の整理

df = pd.DataFrame(results)
df_target = df[df['in_target'] == True].copy()

print('=== ターゲット範囲内の設定 (70%-85%) ===')
if len(df_target) > 0:
    print(df_target[['center_scale', 'noise_std', 'snr', 'train_acc', 'test_acc', 'test_std']].to_string(index=False))
    
    best_idx = df_target['test_std'].idxmin()
    best = df_target.loc[best_idx]
    print(f'\n★ 推奨設定（最小分散）:')
    print(f'   center_scale = {best["center_scale"]}')
    print(f'   noise_std = {best["noise_std"]}')
    print(f'   SNR = {best["snr"]:.2f}')
    print(f'   test_acc = {best["test_acc"]:.3f} ± {best["test_std"]:.3f}')
else:
    print('ターゲット範囲内の設定が見つかりませんでした。')
    print('\n全設定の結果:')
    print(df[['center_scale', 'noise_std', 'snr', 'train_acc', 'test_acc']].to_string(index=False))
    
    # 最も近い設定
    target_center = (TARGET_ACC_MIN + TARGET_ACC_MAX) / 2
    df['distance'] = np.abs(df['test_acc'] - target_center)
    closest = df.loc[df['distance'].idxmin()]
    print(f'\n最もターゲット({target_center:.0%})に近い設定:')
    print(f'   center_scale = {closest["center_scale"]}, noise_std = {closest["noise_std"]}')
    print(f'   SNR = {closest["snr"]:.2f}')
    print(f'   test_acc = {closest["test_acc"]:.3f}')

In [None]:
# ヒートマップ可視化

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

pivot_test = df.pivot(index='noise_std', columns='center_scale', values='test_acc')

ax1 = axes[0]
im1 = ax1.imshow(pivot_test.values, cmap='RdYlGn', aspect='auto', vmin=0.1, vmax=1.0)
ax1.set_xticks(range(len(CENTER_SCALES)))
ax1.set_xticklabels(CENTER_SCALES)
ax1.set_yticks(range(len(NOISE_STDS)))
ax1.set_yticklabels(NOISE_STDS)
ax1.set_xlabel('center_scale', fontsize=12)
ax1.set_ylabel('noise_std', fontsize=12)
ax1.set_title('Test Accuracy (Clean Training)', fontsize=14, fontweight='bold')
plt.colorbar(im1, ax=ax1, label='Accuracy')

for i in range(len(NOISE_STDS)):
    for j in range(len(CENTER_SCALES)):
        val = pivot_test.values[i, j]
        color = 'white' if val < 0.5 else 'black'
        weight = 'bold' if TARGET_ACC_MIN <= val <= TARGET_ACC_MAX else 'normal'
        ax1.text(j, i, f'{val:.2f}', ha='center', va='center', 
                fontsize=9, color=color, fontweight=weight)

ax2 = axes[1]
pivot_in_target = df.pivot(index='noise_std', columns='center_scale', values='in_target')
im2 = ax2.imshow(pivot_in_target.values.astype(float), cmap='Blues', aspect='auto', vmin=0, vmax=1)
ax2.set_xticks(range(len(CENTER_SCALES)))
ax2.set_xticklabels(CENTER_SCALES)
ax2.set_yticks(range(len(NOISE_STDS)))
ax2.set_yticklabels(NOISE_STDS)
ax2.set_xlabel('center_scale', fontsize=12)
ax2.set_ylabel('noise_std', fontsize=12)
ax2.set_title(f'In Target Range ({TARGET_ACC_MIN*100:.0f}%-{TARGET_ACC_MAX*100:.0f}%)', 
             fontsize=14, fontweight='bold')

for i in range(len(NOISE_STDS)):
    for j in range(len(CENTER_SCALES)):
        val = pivot_in_target.values[i, j]
        ax2.text(j, i, '★' if val else '', ha='center', va='center', fontsize=16)

plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/calibration_heatmap_v4.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# 結果の保存

df.to_csv(f'{SAVE_DIR}/calibration_results_v4.csv', index=False)

if len(df_target) > 0:
    best_idx = df_target['test_std'].idxmin()
    best = df_target.loc[best_idx]
    
    calibration_config = {
        'calibration_rule': 'Clean training test accuracy in 70-85% range',
        'version': 'v4 (adjusted SNR range)',
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'recommended_settings': {
            'center_scale': float(best['center_scale']),
            'noise_std': float(best['noise_std']),
            'snr': float(best['snr']),
            'expected_test_acc': float(best['test_acc']),
            'test_acc_std': float(best['test_std'])
        },
        'data_settings': {
            'n_train': N_TRAIN,
            'n_test': N_TEST,
            'n_classes': N_CLASSES,
            'dim_per_view': DIM_PER_VIEW,
            'center_seed': CENTER_SEED
        },
        'all_valid_settings': df_target[['center_scale', 'noise_std', 'snr', 'test_acc', 'test_std']].to_dict('records')
    }
else:
    target_center = (TARGET_ACC_MIN + TARGET_ACC_MAX) / 2
    df['distance'] = np.abs(df['test_acc'] - target_center)
    closest = df.loc[df['distance'].idxmin()]
    
    calibration_config = {
        'calibration_rule': 'Clean training test accuracy in 70-85% range',
        'version': 'v4 (adjusted SNR range)',
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'warning': 'No settings found in target range',
        'closest_settings': {
            'center_scale': float(closest['center_scale']),
            'noise_std': float(closest['noise_std']),
            'snr': float(closest['snr']),
            'expected_test_acc': float(closest['test_acc']),
            'test_acc_std': float(closest['test_std'])
        },
        'data_settings': {
            'n_train': N_TRAIN,
            'n_test': N_TEST,
            'n_classes': N_CLASSES,
            'dim_per_view': DIM_PER_VIEW,
            'center_seed': CENTER_SEED
        }
    }

with open(f'{SAVE_DIR}/calibration_config_v4.json', 'w') as f:
    json.dump(calibration_config, f, indent=2)

print('\n=== 保存完了 ===')
print(f'  {SAVE_DIR}/calibration_results_v4.csv')
print(f'  {SAVE_DIR}/calibration_config_v4.json')
print(f'  {SAVE_DIR}/calibration_heatmap_v4.png')

In [None]:
# 最終サマリー

print('\n' + '=' * 70)
print('較正実験 v4 完了')
print('=' * 70)

print('\n■ v4での修正:')
print('  SNR範囲を下げて課題を難しくした')
print(f'  center_scale: {CENTER_SCALES}')
print(f'  noise_std: {NOISE_STDS}')

if len(df_target) > 0:
    best_idx = df_target['test_std'].idxmin()
    best = df_target.loc[best_idx]
    
    print('\n■ 推奨設定:')
    print(f'  center_scale = {best["center_scale"]}')
    print(f'  noise_std = {best["noise_std"]}')
    print(f'  SNR = {best["snr"]:.2f}')
    print(f'  → test_acc = {best["test_acc"]:.1%} ± {best["test_std"]:.1%}')
    
    print('\n■ 次のステップ:')
    print('  1. この設定でE3縮小版（90 runs）を実行')
    print('  2. 交絡の観察を確認')
    print('  3. 本番E3を実行')
else:
    print('\n⚠️ ターゲット範囲内の設定が見つかりませんでした。')
    print('  結果を確認し、探索範囲の追加調整を検討してください。')

print('\n' + '=' * 70)