# Exp S1c: Coexistence Map (Œª=0.50, 0.52)

## ÁõÆÁöÑ
Áõ∏ÂÖ±Â≠òÈ†òÂüü„ÅÆÈ´òŒªÂÅ¥ÔºöcollapseÂÑ™‰Ωç„Å´„Å™„ÇäÂßã„ÇÅ„ÇãÈ†òÂüü„ÇíÊ∏¨ÂÆö„ÄÇ

## ÂÆüÈ®ìË®≠Ë®à
- **Œª**: 0.50, 0.52
- **Œ∑**: 0.4
- **Seeds**: 50 per Œª (0-49)
- **Total**: 100 runs

## Êé®ÂÆöÊôÇÈñì
~100 √ó 5 min ‚âà **8h**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, glob, json, time
from datetime import datetime

EXP_NAME = 'exp_S1_coexistence_map'
NOTEBOOK_ID = 'S1c'
BASE_DIR = '/content/drive/MyDrive/dual-gradient-learning/Paper-A'

existing = glob.glob(f'{BASE_DIR}/{EXP_NAME}_*')
if existing:
    SAVE_DIR = sorted(existing)[-1]
    print(f'üîÑ Resuming: {SAVE_DIR}')
else:
    TIMESTAMP = datetime.now().strftime('%Y%m%d_%H%M%S')
    SAVE_DIR = f'{BASE_DIR}/{EXP_NAME}_{TIMESTAMP}'
    os.makedirs(SAVE_DIR, exist_ok=True)
    print(f'üÜï New: {SAVE_DIR}')

os.makedirs(f'{SAVE_DIR}/figures', exist_ok=True)
print(f'Notebook: {NOTEBOOK_ID}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils import parameters_to_vector
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name()}')

In [None]:
def get_resnet18():
    model = resnet18(weights=None, num_classes=10)
    model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    model.maxpool = nn.Identity()
    return model

class IndexedDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        return img, label, idx
    def __len__(self):
        return len(self.dataset)

In [None]:
BATCH_SIZE = 256
NUM_WORKERS = 4
EPOCHS = 100
LR = 0.1
K = 16
NOISE_RATE = 0.4
LAMBDA_VALUES = [0.50, 0.52]
SEEDS = list(range(50))

experiments = [{'lambda': l, 'seed': s} for l in LAMBDA_VALUES for s in SEEDS]
print(f'Total: {len(experiments)} runs')

In [None]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def inject_label_noise(labels, noise_rate, seed):
    np.random.seed(seed)
    noisy = labels.copy()
    n_noisy = int(noise_rate * len(labels))
    idx = np.random.choice(len(labels), n_noisy, replace=False)
    for i in idx:
        noisy[i] = np.random.choice([l for l in range(10) if l != labels[i]])
    return noisy

def load_cifar10():
    tr = transforms.Compose([transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(),
                             transforms.ToTensor(), transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010))])
    te = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010))])
    return torchvision.datasets.CIFAR10('./data', True, tr, download=True), torchvision.datasets.CIFAR10('./data', False, te, download=True)

def evaluate(model, loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            correct += (model(x).argmax(1) == y).sum().item()
            total += y.size(0)
    return correct / total

In [None]:
def train_dual_gradient(model, train_loader, test_loader, clean_labels, noisy_labels, lam):
    opt = optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
    sched = optim.lr_scheduler.MultiStepLR(opt, [50, 75], 0.1)
    crit = nn.CrossEntropyLoss()
    clean_t = torch.tensor(clean_labels, device=device)
    noisy_t = torch.tensor(noisy_labels, device=device)
    cached_gv = None
    step = 0
    cos_hist = []
    
    for ep in range(EPOCHS):
        model.train()
        ep_cos = []
        for x, _, idx in train_loader:
            x, idx = x.to(device), idx.to(device)
            bn, bc = noisy_t[idx], clean_t[idx]
            
            opt.zero_grad()
            loss_s = crit(model(x), bn)
            loss_s.backward(retain_graph=True)
            gs = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            if step % K == 0 or cached_gv is None:
                opt.zero_grad()
                loss_v = crit(model(x), bc)
                loss_v.backward()
                cached_gv = parameters_to_vector([p.grad for p in model.parameters()]).clone()
            
            gs_n = gs / (gs.norm() + 1e-12)
            gv_n = cached_gv / (cached_gv.norm() + 1e-12)
            ep_cos.append((gs_n @ gv_n).item())
            
            g_mix = (1 - lam) * gs_n + lam * gv_n
            opt.zero_grad()
            i = 0
            for p in model.parameters():
                n = p.numel()
                p.grad = g_mix[i:i+n].view(p.shape).clone()
                i += n
            opt.step()
            step += 1
        sched.step()
        cos_hist.append(np.mean(ep_cos))
    
    return 1 - evaluate(model, test_loader), np.mean(cos_hist)

In [None]:
trainset, testset = load_cifar10()
clean_labels = np.array(trainset.targets)
train_loader = DataLoader(IndexedDataset(trainset), BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
test_loader = DataLoader(testset, BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

m = get_resnet18().to(device)
for _ in range(20): _ = m(torch.randn(BATCH_SIZE,3,32,32,device=device))
del m; torch.cuda.empty_cache()
print('Ready')

In [None]:
results = []
ckpt = f'{SAVE_DIR}/{NOTEBOOK_ID}_checkpoint.json'
done = set()

if os.path.exists(ckpt):
    results = json.load(open(ckpt))
    done = {(r['lambda'], r['seed']) for r in results}
    print(f'Loaded: {len(done)} done')

total = len(experiments)
for exp in experiments:
    lam, seed = exp['lambda'], exp['seed']
    if (lam, seed) in done: continue
    
    run = len(done) + 1
    print(f'\n[{run}/{total}] Œª={lam} seed={seed}')
    
    set_seed(seed)
    noisy = inject_label_noise(clean_labels, NOISE_RATE, seed)
    model = get_resnet18().to(device)
    
    t0 = time.time()
    err, cos = train_dual_gradient(model, train_loader, test_loader, clean_labels, noisy, lam)
    dt = time.time() - t0
    
    phase = 'ordered' if err <= 0.20 else ('collapsed' if err >= 0.40 else 'intermediate')
    results.append({'experiment_id': f'{NOTEBOOK_ID}-{run:03d}', 'lambda': lam, 'seed': seed,
                    'final_error': err, 'avg_cos': cos, 'phase': phase, 'time': dt})
    done.add((lam, seed))
    
    st = '‚úÖ' if phase == 'ordered' else ('‚ö†Ô∏è' if phase == 'collapsed' else 'üî∂')
    print(f'  Error: {err:.4f} | {phase} {st} | {dt/60:.1f}min')
    
    json.dump(results, open(ckpt, 'w'), indent=2)
    print(f'  ETA: {(total-run)*dt/3600:.1f}h')
    
    del model; torch.cuda.empty_cache()

print('\n' + '='*50 + f'\n{NOTEBOOK_ID} DONE\n' + '='*50)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

json.dump(results, open(f'{SAVE_DIR}/{NOTEBOOK_ID}_results.json', 'w'), indent=2)
df = pd.DataFrame(results)
df.to_csv(f'{SAVE_DIR}/{NOTEBOOK_ID}_results.csv', index=False)

fig, ax = plt.subplots(1, 2, figsize=(12, 5))
for l in LAMBDA_VALUES:
    sub = df[df['lambda'] == l]
    ax[0].hist(sub['final_error'], bins=15, alpha=0.6, label=f'Œª={l}')
ax[0].axvline(0.20, color='g', ls='--'); ax[0].axvline(0.40, color='r', ls='--')
ax[0].set_xlabel('Error'); ax[0].legend(); ax[0].set_title('Error Distribution')

for l in LAMBDA_VALUES:
    sub = df[df['lambda'] == l]
    print(f"Œª={l}: ord={(sub['phase']=='ordered').sum()}, col={(sub['phase']=='collapsed').sum()}, int={(sub['phase']=='intermediate').sum()}")

plt.tight_layout()
plt.savefig(f'{SAVE_DIR}/figures/{NOTEBOOK_ID}_dist.png', dpi=150)
plt.show()