# Exp Q1c2: Finite-Size Scaling (width=2.0√ó, Œ∑=0.8, Œª=0.38)

## ÂÆüÈ®ìË®≠Ë®à
- **Model**: ResNet18 (width=2.0√ó)
- **Œ∑**: 0.8
- **Œª**: 0.38„ÅÆ„Åø
- **Seeds**: 10
- **Total**: 10 runs

## Êé®ÂÆöÊôÇÈñì
10 runs √ó 22 min ‚âà **3.7 ÊôÇÈñì**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import glob
from datetime import datetime

EXP_NAME = 'exp_Q1_finite_size_scaling'
BASE_DIR = '/content/drive/MyDrive/dual-gradient-learning/Paper-A'

existing = glob.glob(f'{BASE_DIR}/{EXP_NAME}_*')
if existing:
    SAVE_DIR = sorted(existing)[-1]
    print(f'üîÑ Using existing directory: {SAVE_DIR}')
else:
    raise RuntimeError('No existing Q1 directory found!')

os.makedirs(f'{SAVE_DIR}/figures', exist_ok=True)
print(f'This notebook: Q1c2 (width=2.0√ó, Œ∑=0.8, Œª=0.38)')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils import parameters_to_vector
import torchvision
import torchvision.transforms as transforms
import numpy as np
import json
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name()}')

In [None]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_planes, planes, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes))
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return F.relu(out)

class ResNetScalable(nn.Module):
    def __init__(self, width_mult=1.0, num_classes=10):
        super().__init__()
        self.in_planes = int(64 * width_mult)
        c1, c2, c3, c4 = [int(c * width_mult) for c in [64, 128, 256, 512]]
        self.conv1 = nn.Conv2d(3, c1, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(c1)
        self.layer1 = self._make_layer(c1, 2, stride=1)
        self.layer2 = self._make_layer(c2, 2, stride=2)
        self.layer3 = self._make_layer(c3, 2, stride=2)
        self.layer4 = self._make_layer(c4, 2, stride=2)
        self.linear = nn.Linear(c4, num_classes)
    def _make_layer(self, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(BasicBlock(self.in_planes, planes, s))
            self.in_planes = planes
        return nn.Sequential(*layers)
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        return self.linear(out.view(out.size(0), -1))

class IndexedDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        return img, label, idx
    def __len__(self):
        return len(self.dataset)

In [None]:
BATCH_SIZE = 256
NUM_WORKERS = 4
EPOCHS = 100
LR = 0.1
K = 16

WIDTH_MULT = 2.0
NOISE_RATE = 0.8
LAMBDAS = [0.38]  # „Åì„ÅÆ„Éé„Éº„Éà„Éñ„ÉÉ„ÇØÂ∞ÇÁî®
SEEDS = list(range(10))

experiments = []
for lam in LAMBDAS:
    for seed in SEEDS:
        experiments.append({'width_mult': WIDTH_MULT, 'noise_rate': NOISE_RATE, 'lambda': lam, 'seed': seed})

print(f'Target: width={WIDTH_MULT}√ó, Œ∑={NOISE_RATE}, Œª={LAMBDAS}')
print(f'Total experiments: {len(experiments)}')
print(f'Estimated time: {len(experiments) * 22 / 60:.1f} hours')

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

def load_cifar10():
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
    transform_test = transforms.Compose([
        transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
    trainset = torchvision.datasets.CIFAR10('./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR10('./data', train=False, download=True, transform=transform_test)
    return trainset, testset

def inject_label_noise(labels, noise_rate, seed):
    np.random.seed(seed)
    labels = np.array(labels)
    n_noisy = int(noise_rate * len(labels))
    noisy_idx = np.random.choice(len(labels), n_noisy, replace=False)
    noisy_labels = labels.copy()
    for idx in noisy_idx:
        noisy_labels[idx] = np.random.choice([l for l in range(10) if l != labels[idx]])
    return noisy_labels

def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    return correct / total

In [None]:
def train_dual_gradient(model, train_loader, test_loader, clean_labels, noisy_labels, lam):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 75], gamma=0.1)
    criterion = nn.CrossEntropyLoss()
    clean_labels_t = torch.tensor(clean_labels, device=device)
    noisy_labels_t = torch.tensor(noisy_labels, device=device)
    cached_g_value = None
    global_step = 0
    best_acc = 0
    cos_history = []
    history = {'epoch': [], 'test_acc': [], 'test_error': []}
    
    for epoch in range(EPOCHS):
        model.train()
        epoch_cos = []
        for inputs, _, indices in train_loader:
            inputs = inputs.to(device, non_blocking=True)
            indices = indices.to(device, non_blocking=True)
            batch_noisy = noisy_labels_t[indices]
            batch_clean = clean_labels_t[indices]
            
            optimizer.zero_grad()
            loss_struct = criterion(model(inputs), batch_noisy)
            loss_struct.backward(retain_graph=True)
            g_struct = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            if global_step % K == 0 or cached_g_value is None:
                optimizer.zero_grad()
                loss_value = criterion(model(inputs), batch_clean)
                loss_value.backward()
                cached_g_value = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            g_struct_norm = g_struct / (g_struct.norm() + 1e-12)
            g_value_norm = cached_g_value / (cached_g_value.norm() + 1e-12)
            cos_sim = (g_struct_norm @ g_value_norm).item()
            epoch_cos.append(cos_sim)
            g_mix = (1 - lam) * g_struct_norm + lam * g_value_norm
            
            optimizer.zero_grad()
            idx = 0
            for p in model.parameters():
                numel = p.numel()
                p.grad = g_mix[idx:idx+numel].view(p.shape).clone()
                idx += numel
            optimizer.step()
            global_step += 1
        
        scheduler.step()
        cos_history.append(np.mean(epoch_cos))
        if (epoch + 1) % 10 == 0:
            acc = evaluate(model, test_loader)
            best_acc = max(best_acc, acc)
            history['epoch'].append(epoch + 1)
            history['test_acc'].append(acc)
            history['test_error'].append(1 - acc)
    
    final_acc = evaluate(model, test_loader)
    return final_acc, max(best_acc, final_acc), np.mean(cos_history), history

In [None]:
trainset, testset = load_cifar10()
clean_labels = np.array(trainset.targets)
train_loader = DataLoader(IndexedDataset(trainset), batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
print('Data prepared')

warmup_model = ResNetScalable(width_mult=WIDTH_MULT).to(device)
for _ in range(20):
    _ = warmup_model(torch.randn(BATCH_SIZE, 3, 32, 32, device=device))
del warmup_model
torch.cuda.empty_cache()
print('GPU warmed up')

In [None]:
results = []
checkpoint_file = f'{SAVE_DIR}/exp_Q1c2_checkpoint.json'
completed = set()

if os.path.exists(checkpoint_file):
    with open(checkpoint_file, 'r') as f:
        results = json.load(f)
    for r in results:
        completed.add((r['width_mult'], r['noise_rate'], r['lambda'], r['seed']))
    print(f'Checkpoint loaded: {len(results)} runs completed')

total = len(experiments)

for exp in experiments:
    width = exp['width_mult']
    eta = exp['noise_rate']
    lam = exp['lambda']
    seed = exp['seed']
    
    if (width, eta, lam, seed) in completed:
        continue
    
    run_num = len(completed) + 1
    print(f'\n[{run_num}/{total}] width={width}√ó Œ∑={eta} Œª={lam} seed={seed}')
    
    set_seed(seed)
    noisy_labels = inject_label_noise(clean_labels, eta, seed)
    model = ResNetScalable(width_mult=width).to(device)
    
    start_time = time.time()
    final_acc, best_acc, avg_cos, history = train_dual_gradient(
        model, train_loader, test_loader, clean_labels, noisy_labels, lam)
    elapsed = time.time() - start_time
    
    result = {
        'experiment_id': f'Q1c2-{run_num:03d}',
        'experiment': 'exp_Q1c2_finite_size_scaling',
        'width_mult': width, 'noise_rate': eta, 'lambda': lam, 'seed': seed,
        'test_acc': final_acc, 'test_error': 1 - final_acc,
        'best_acc': best_acc, 'best_error': 1 - best_acc,
        'avg_cos_struct_value': avg_cos, 'time_seconds': elapsed, 'history': history
    }
    results.append(result)
    completed.add((width, eta, lam, seed))
    
    status = '‚úÖ' if best_acc > 0.85 else ('‚ö†Ô∏è COLLAPSE' if best_acc < 0.5 else '')
    print(f'  Error: {1-best_acc:.4f} | cos: {avg_cos:.4f} | Time: {elapsed/60:.1f} min {status}')
    
    with open(checkpoint_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    remaining = total - len(completed)
    print(f'  Progress: {len(completed)}/{total} | ETA: {remaining * elapsed / 3600:.1f} hours')
    
    del model
    torch.cuda.empty_cache()

print('\n' + '='*70)
print('Q1c2 (width=2.0√ó, Œ∑=0.8, Œª=0.38) COMPLETED!')
print('='*70)

In [None]:
import pandas as pd

with open(f'{SAVE_DIR}/exp_Q1c2_results.json', 'w') as f:
    json.dump(results, f, indent=2)

results_flat = [{k: v for k, v in r.items() if k != 'history'} for r in results]
df = pd.DataFrame(results_flat)
df.to_csv(f'{SAVE_DIR}/exp_Q1c2_results.csv', index=False)

print(f'Results saved: {len(results)} runs')