In [1]:
# ============================================================
# 0) MOUNT DRIVE + PATHS
# ============================================================
from google.colab import drive
drive.mount("/content/drive")

import os, json, math, copy, time, random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision import datasets, transforms, models

# >>> Set these two paths <<<
PACS_ROOT = "/content/drive/MyDrive/datasets/PACS"      # your ImageFolder PACS
PROJECT_ROOT = "/content/drive/MyDrive/SoMA_PACS_E3"    # outputs

os.makedirs(PROJECT_ROOT, exist_ok=True)
print("PACS_ROOT:", PACS_ROOT)
print("PROJECT_ROOT:", PROJECT_ROOT)


Mounted at /content/drive
PACS_ROOT: /content/drive/MyDrive/datasets/PACS
PROJECT_ROOT: /content/drive/MyDrive/SoMA_PACS_E3


In [2]:
# ============================================================
# 1) REPRODUCIBILITY + DEVICE
# ============================================================
def set_seed(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

device = "cuda" if torch.cuda.is_available() else "cpu"
print("device:", device)


device: cuda


In [3]:
# ============================================================
# DATALOADER WORKER SEEDING (DETERMINISTIC)
# ============================================================
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


In [4]:
# ============================================================
# 2) CONFIG (Scope 1 default)
# ============================================================
CFG = {
    # Data
    "source_domains": ["photo", "art_painting", "cartoon"],
    "target_domain": "sketch",
    "img_size": 224,
    "batch_size_per_domain": 16,   # per-domain batch; total batch = 3x this
    "num_workers": 2,

    # Training
    "epochs": 20,
    "lr_full": 3e-4,    # Adam
    "lr_adapter": 1e-3, # Adam
    "weight_decay": 0.0,

    # Adapters
    "rank": 8,
    "adapter_scale": 1.0,

    # Scope
    "scope": "layer4_only",   # <-- run this now
    # "scope": "layer3_layer4",  # <-- later

    # ΔW subspace computation
    "deltaW_small_adapt_epochs": 1,   # small adaptation
    "deltaW_lr": 1e-3,
    "deltaW_examples_per_class": 20,  # per-domain, per-class (class-balanced)
    "freeze_bn_stats": True,

    # Seeds
    "seeds": [42, 123],
}

# Adapter layer specs per scope
SCOPE_LAYERS = {
    "layer4_only": [
        ("layer4", 0, "conv2"),
        ("layer4", 1, "conv2"),
    ],
    "layer3_layer4": [
        ("layer3", 0, "conv2"),
        ("layer3", 1, "conv2"),
        ("layer4", 0, "conv2"),
        ("layer4", 1, "conv2"),
    ],
}

# ADAPTER_LAYERS = SCOPE_LAYERS[CFG["scope"]]
CFG["scope"] = "layer3_layer4"
ADAPTER_LAYERS = SCOPE_LAYERS[CFG["scope"]]
print("Scope:", CFG["scope"])
print("Adapter layers:", ADAPTER_LAYERS)

# Variants to compare
VARIANTS = ["soma_minor", "soma_major", "random", "full_finetune","deltaW_subspace"]

# Save config
with open(os.path.join(PROJECT_ROOT, f"config_{CFG['scope']}.json"), "w") as f:
    json.dump(CFG, f, indent=2)


Scope: layer3_layer4
Adapter layers: [('layer3', 0, 'conv2'), ('layer3', 1, 'conv2'), ('layer4', 0, 'conv2'), ('layer4', 1, 'conv2')]


In [5]:
# ============================================================
# 3) TRANSFORMS + DATASET HELPERS
# ============================================================
IMG_SIZE = CFG["img_size"]

train_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

test_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

def get_domain_dataset(domain, tfm):
    return datasets.ImageFolder(root=os.path.join(PACS_ROOT, domain), transform=tfm)

def make_class_balanced_loader(domain, tfm, batch_size, num_workers, seed=0, limit_per_class=None):
    """
    Returns a DataLoader with WeightedRandomSampler to balance classes within this domain.
    Optional limit_per_class creates a small balanced subset (used for ΔW small adapt).
    """
    ds = get_domain_dataset(domain, tfm)

    # Optional: create a small class-balanced subset
    if limit_per_class is not None:
        # indices per class
        cls_to_idx = ds.class_to_idx
        targets = np.array([y for _, y in ds.samples])
        idxs = []
        rng = np.random.default_rng(seed)
        for c in range(len(cls_to_idx)):
            cls_idxs = np.where(targets == c)[0]
            if len(cls_idxs) == 0:
                continue
            take = min(limit_per_class, len(cls_idxs))
            chosen = rng.choice(cls_idxs, size=take, replace=False)
            idxs.extend(chosen.tolist())
        idxs = np.array(sorted(idxs))
        # wrap subset
        ds = torch.utils.data.Subset(ds, idxs)
        # need subset targets for sampler
        subset_targets = targets[idxs]
        targets_for_weights = subset_targets
    else:
        targets_for_weights = np.array([y for _, y in ds.samples])

    # weights inverse frequency
    class_counts = np.bincount(targets_for_weights, minlength=7).astype(np.float64)
    class_counts[class_counts == 0] = 1.0
    class_weights = 1.0 / class_counts
    sample_weights = class_weights[targets_for_weights]

    g = torch.Generator()
    g.manual_seed(seed)
    sampler = WeightedRandomSampler(
        weights=torch.tensor(sample_weights, dtype=torch.double),
        num_samples=len(sample_weights),
        replacement=True,
        generator=g
    )

    loader = DataLoader(
        ds,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=num_workers,
        pin_memory=True
    )
    return loader

def make_plain_loader(domain, tfm, batch_size, num_workers, shuffle=False):
    ds = get_domain_dataset(domain, tfm)
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)

class MultiDomainBatcher:
    """
    Iterates over multiple loaders and yields a domain-balanced batch by concatenation:
    one batch from each domain -> concat along batch dimension.
    """
    def __init__(self, loaders):
        self.loaders = loaders

    def __iter__(self):
        iters = [iter(l) for l in self.loaders]
        while True:
            xs, ys = [], []
            for i, it in enumerate(iters):
                try:
                    x, y = next(it)
                except StopIteration:
                    return
                xs.append(x)
                ys.append(y)
            yield torch.cat(xs, dim=0), torch.cat(ys, dim=0)

    def __len__(self):
        return min(len(l) for l in self.loaders)


In [6]:
# ============================================================
# SOURCE TRAIN/VAL SPLITS (NO TARGET PEEKING)
# ============================================================
from torch.utils.data import Subset

def split_indices_stratified(ds, val_frac=0.2, seed=0):
    # ds is ImageFolder; ds.samples = [(path, y), ...]
    rng = np.random.default_rng(seed)
    targets = np.array([y for _, y in ds.samples])
    n_classes = targets.max() + 1

    train_idx, val_idx = [], []
    for c in range(n_classes):
        idx_c = np.where(targets == c)[0]
        rng.shuffle(idx_c)
        n_val = int(math.ceil(val_frac * len(idx_c)))
        val_idx.extend(idx_c[:n_val].tolist())
        train_idx.extend(idx_c[n_val:].tolist())

    rng.shuffle(train_idx)
    rng.shuffle(val_idx)
    return train_idx, val_idx

def make_subset_loader(ds_full, indices, batch_size, num_workers, shuffle, seed=0, class_balance=False, augment=False):
    ds = Subset(ds_full, indices)

    # Build sampler if class_balance
    if class_balance:
        # Need targets for subset
        targets = np.array([ds_full.samples[i][1] for i in indices])
        class_counts = np.bincount(targets, minlength=7).astype(np.float64)
        class_counts[class_counts == 0] = 1.0
        class_weights = 1.0 / class_counts
        sample_weights = class_weights[targets]

        g = torch.Generator().manual_seed(seed)
        sampler = WeightedRandomSampler(
            weights=torch.tensor(sample_weights, dtype=torch.double),
            num_samples=len(sample_weights),
            replacement=True,
            generator=g
        )
        return DataLoader(ds, batch_size=batch_size, sampler=sampler,
                          num_workers=num_workers, pin_memory=True,
                          worker_init_fn=seed_worker, generator=g)
    else:
        g = torch.Generator().manual_seed(seed)
        return DataLoader(ds, batch_size=batch_size, shuffle=shuffle,
                          num_workers=num_workers, pin_memory=True,
                          worker_init_fn=seed_worker, generator=g)

def build_source_train_val_loaders(seed, val_frac=0.2):
    """
    Returns:
      train_loaders: list over source domains (class-balanced, augmented)
      val_loaders:   list over source domains (plain, no augmentation)
    """
    train_loaders, val_loaders = [], []
    for dom in CFG["source_domains"]:
        ds_train_aug = get_domain_dataset(dom, train_tf)  # aug tf
        ds_val_plain = get_domain_dataset(dom, test_tf)   # no aug tf

        train_idx, val_idx = split_indices_stratified(ds_train_aug, val_frac=val_frac, seed=seed)

        tr_loader = make_subset_loader(ds_train_aug, train_idx,
                                       batch_size=CFG["batch_size_per_domain"],
                                       num_workers=CFG["num_workers"],
                                       shuffle=False, seed=seed,
                                       class_balance=True)

        va_loader = make_subset_loader(ds_val_plain, val_idx,
                                       batch_size=CFG["batch_size_per_domain"] * len(CFG["source_domains"]),
                                       num_workers=CFG["num_workers"],
                                       shuffle=False, seed=seed,
                                       class_balance=False)
        train_loaders.append(tr_loader)
        val_loaders.append(va_loader)

    return train_loaders, val_loaders

@torch.no_grad()
def evaluate_acc_multi(model, loaders):
    accs = [evaluate_acc(model, l) for l in loaders]
    return float(np.mean(accs)), [float(a) for a in accs]


In [7]:
# ============================================================
# 4) MODEL: ADAPTER MODULE + RESNET18 WRAPPER
# ============================================================
class FrozenSubspaceAdapter(nn.Module):
    """
    Adapter for a Conv2d layer:
      y = conv(x) + scale * B( A(x) )
    where:
      A is a trainable kxk conv:  C_in -> r
      B is a frozen 1x1 conv:     r -> C_out  (weights set to subspace basis)
    """
    def __init__(self, conv: nn.Conv2d, basis_U: np.ndarray, scale=1.0):
        super().__init__()
        assert isinstance(conv, nn.Conv2d)
        C_out, C_in, k1, k2 = conv.weight.shape
        r = basis_U.shape[1]
        assert basis_U.shape == (C_out, r)

        self.conv = conv
        self.scale = scale

        # Trainable A: kxk conv mapping into rank-r space
        self.A = nn.Conv2d(
            in_channels=C_in, out_channels=r,
            kernel_size=(k1, k2),
            stride=conv.stride, padding=conv.padding,
            bias=False
        )
        nn.init.normal_(self.A.weight, mean=0.0, std=0.01)

        # Frozen B: 1x1 conv mapping back to C_out
        self.B = nn.Conv2d(in_channels=r, out_channels=C_out, kernel_size=1, bias=False)
        with torch.no_grad():
            Wb = torch.from_numpy(basis_U.astype(np.float32))  # [C_out, r]
            self.B.weight.copy_(Wb[:, :, None, None])          # [C_out, r, 1, 1]
        for p in self.B.parameters():
            p.requires_grad = False

    def forward(self, x):
        out = self.conv(x)
        adapt = self.B(self.A(x))
        return out + self.scale * adapt

class ResNet18WithAdapters(nn.Module):
    def __init__(self, num_classes=7):
        super().__init__()
        self.backbone = models.resnet18(weights="IMAGENET1K_V1")
        self.backbone.fc = nn.Linear(512, num_classes)

        # store adapters (purely for bookkeeping)
        self.adapters = nn.ModuleDict()

    def _get_conv(self, layer_name, block_idx, conv_name):
        layer = getattr(self.backbone, layer_name)
        block = layer[block_idx]
        return getattr(block, conv_name)

    def _set_conv(self, layer_name, block_idx, conv_name, new_module):
        layer = getattr(self.backbone, layer_name)
        block = layer[block_idx]
        setattr(block, conv_name, new_module)

    def install_adapters(self, adapter_layers, bases_by_layer, scale=1.0):
        """
        bases_by_layer: dict key -> np.ndarray [C_out, r]
        key is f"{layer_name}_{block_idx}_{conv_name}"
        """
        for (layer_name, block_idx, conv_name) in adapter_layers:
            key = f"{layer_name}_{block_idx}_{conv_name}"
            conv = self._get_conv(layer_name, block_idx, conv_name)
            adapter = FrozenSubspaceAdapter(conv, bases_by_layer[key], scale=scale)
            self._set_conv(layer_name, block_idx, conv_name, adapter)
            self.adapters[key] = adapter

    def freeze_bn_running_stats(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    def set_bn_affine_trainable(self, trainable=True):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                if m.weight is not None: m.weight.requires_grad = trainable
                if m.bias is not None:   m.bias.requires_grad = trainable

    def forward(self, x):
        return self.backbone(x)

def freeze_all_params(model: nn.Module):
    for p in model.parameters():
        p.requires_grad = False

def count_trainable(model: nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [8]:
# ============================================================
# 5) SUBSPACE BASES: SoMA major/minor/random AND ΔW-subspace
# ============================================================
def conv_weight_matrix(conv: nn.Conv2d) -> np.ndarray:
    W = conv.weight.detach().cpu().numpy()
    return W.reshape(W.shape[0], -1)  # [C_out, D]

def svd_U(Wmat: np.ndarray):
    U, S, Vt = np.linalg.svd(Wmat, full_matrices=False)
    return U, S

def make_random_basis(C_out, r, seed):
    rng = np.random.default_rng(seed)
    Q, _ = np.linalg.qr(rng.standard_normal((C_out, r)))
    return Q

def compute_bases_from_imagenet(model: ResNet18WithAdapters, adapter_layers, kind, r, seed=0):
    """
    kind: soma_major / soma_minor / random
    computed from ImageNet weights of the model (before any PACS training)
    """
    bases = {}
    for (layer_name, block_idx, conv_name) in adapter_layers:
        key = f"{layer_name}_{block_idx}_{conv_name}"
        conv = getattr(getattr(model.backbone, layer_name)[block_idx], conv_name)
        Wmat = conv_weight_matrix(conv)
        U, _ = svd_U(Wmat)
        C_out = U.shape[0]

        if kind == "soma_major":
            bases[key] = U[:, :r]
        elif kind == "soma_minor":
            bases[key] = U[:, -r:]
        elif kind == "random":
            # seed per-layer for stability
            bases[key] = make_random_basis(C_out, r, seed=seed + abs(hash(key)) % 10000)
        else:
            raise ValueError(kind)
    return bases


In [9]:
# ============================================================
# 6) ΔW-SUBSPACE COMPUTATION (from ImageNet model via small per-domain adaptations)
# ============================================================
def set_train_mode_bn_frozen(model: nn.Module):
    model.train()
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.eval()

def build_deltaW_channel_subspace(dW_list):
    """
    dW_list: list of ΔW matrices for one layer, each [C_out, D]
    returns eigvecs [C_out, C_out] sorted desc
    """
    C_out = dW_list[0].shape[0]
    C = np.zeros((C_out, C_out), dtype=np.float64)
    for dW in dW_list:
        C += dW @ dW.T
    eigvals, eigvecs = np.linalg.eigh(C)
    idx = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, idx]
    eigvals = eigvals[idx]
    return eigvecs, eigvals

def tiny_adapt_and_get_deltaW_bases(
    base_imagenet_model: ResNet18WithAdapters,
    adapter_layers,
    r,
    source_domains,
    examples_per_class,
    epochs,
    lr,
    seed
):
    """
    Match your prior logic:
      - for each domain: train small class-balanced subset from ImageNet base
      - extract ΔW per layer
      - aggregate using C = sum_d ΔWΔW^T
      - basis = eigvecs(C)[:, :r]
    """
    set_seed(seed)

    # Snapshot W0 for each conv as matrix [C_out, D]
    W0 = {}
    for (layer_name, block_idx, conv_name) in adapter_layers:
        key = f"{layer_name}_{block_idx}_{conv_name}"
        conv0 = getattr(getattr(base_imagenet_model.backbone, layer_name)[block_idx], conv_name)
        W0[key] = conv_weight_matrix(conv0).copy()

    # Collect ΔWs per layer
    dW_lists = {k: [] for k in W0.keys()}

    for dom in source_domains:
        m = copy.deepcopy(base_imagenet_model).to(device)

        # freeze BN running stats
        set_train_mode_bn_frozen(m)
        m.set_bn_affine_trainable(True)

        # train small subset: allow updating backbone (like your previous code)
        # If you want it closer to your prior snippet, keep all params trainable.
        for p in m.parameters():
            p.requires_grad = True
        if CFG["freeze_bn_stats"]:
            m.freeze_bn_running_stats()
            m.set_bn_affine_trainable(True)

        # small balanced loader (subset per class)
        loader = make_class_balanced_loader(
            dom, train_tf,
            batch_size=CFG["batch_size_per_domain"],
            num_workers=CFG["num_workers"],
            seed=seed,
            limit_per_class=examples_per_class
        )

        opt = torch.optim.Adam(m.parameters(), lr=lr)
        crit = nn.CrossEntropyLoss()

        for ep in range(epochs):
            for x, y in loader:
                x, y = x.to(device), y.to(device)
                opt.zero_grad(set_to_none=True)
                loss = crit(m(x), y)
                loss.backward()
                opt.step()

        # extract ΔW for each target layer
        for (layer_name, block_idx, conv_name) in adapter_layers:
            key = f"{layer_name}_{block_idx}_{conv_name}"
            convd = getattr(getattr(m.backbone, layer_name)[block_idx], conv_name)
            Wd = conv_weight_matrix(convd)
            dW = Wd - W0[key]
            dW_lists[key].append(dW)

        del m
        torch.cuda.empty_cache()

    # Build bases from channel-covariance eigvecs
    bases = {}
    for key, dW_list in dW_lists.items():
        Ueig, _ = build_deltaW_channel_subspace(dW_list)
        bases[key] = Ueig[:, :r]
    return bases


In [10]:
# ============================================================
# 7) TRAIN / EVAL LOOPS (Adapter vs Full finetune)
# ============================================================
@torch.no_grad()
def evaluate_acc(model, loader):
    model.eval()
    correct, total = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        pred = model(x).argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return correct / total

def train_one_epoch_domain_balanced(model, md_batcher, optimizer, criterion, freeze_bn_stats=True):
    model.train()
    if freeze_bn_stats:
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    total_loss, correct, total = 0.0, 0, 0
    for x, y in md_batcher:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)
        correct += (logits.argmax(1) == y).sum().item()
        total += y.size(0)

    return total_loss / max(1, total), correct / max(1, total)

def build_source_loaders(seed):
    loaders = []
    for d in CFG["source_domains"]:
        loaders.append(make_class_balanced_loader(
            d, train_tf,
            batch_size=CFG["batch_size_per_domain"],
            num_workers=CFG["num_workers"],
            seed=seed
        ))
    return loaders

def build_target_loader():
    return make_plain_loader(
        CFG["target_domain"], test_tf,
        batch_size=CFG["batch_size_per_domain"] * len(CFG["source_domains"]),
        num_workers=CFG["num_workers"],
        shuffle=False
    )


In [11]:
# ============================================================
# 8) BUILD MODELS PER VARIANT
# ============================================================
def make_model_for_variant(variant, seed):
    """
    Returns (model, trainable_params_count)
    All start from ImageNet pretrained ResNet18.
    """
    set_seed(seed)
    base = ResNet18WithAdapters(num_classes=7).to(device)

    # BN handling (applies to both adapter and full finetune)
    if CFG["freeze_bn_stats"]:
        base.freeze_bn_running_stats()
        base.set_bn_affine_trainable(True)

    if variant == "full_finetune":
        # train everything except BN running stats (affine trainable)
        for p in base.parameters():
            p.requires_grad = True
        if CFG["freeze_bn_stats"]:
            base.freeze_bn_running_stats()
            base.set_bn_affine_trainable(True)
        return base, count_trainable(base)

    # adapter variants
    r = CFG["rank"]

    if variant in ["soma_major", "soma_minor", "random"]:
        bases = compute_bases_from_imagenet(base, ADAPTER_LAYERS, variant, r, seed=seed)

    elif variant == "deltaW_subspace":
        bases = tiny_adapt_and_get_deltaW_bases(
            base_imagenet_model=base,
            adapter_layers=ADAPTER_LAYERS,
            r=r,
            source_domains=CFG["source_domains"],
            examples_per_class=CFG["deltaW_examples_per_class"],
            epochs=CFG["deltaW_small_adapt_epochs"],
            lr=CFG["deltaW_lr"],
            seed=seed
        )
    else:
        raise ValueError("Unknown variant: " + variant)

    # install adapters
    base.install_adapters(ADAPTER_LAYERS, bases_by_layer=bases, scale=CFG["adapter_scale"])

    # freeze all, unfreeze adapter A + fc + BN affine
    freeze_all_params(base)
    for adapter in base.adapters.values():
        adapter.A.weight.requires_grad = True
    for p in base.backbone.fc.parameters():
        p.requires_grad = True
    base.set_bn_affine_trainable(True)

    if CFG["freeze_bn_stats"]:
        base.set_bn_affine_trainable(True)

    if CFG["freeze_bn_stats"]:
        base.freeze_bn_running_stats()


    return base, count_trainable(base)


In [12]:
import pandas as pd

def run_variant(variant, seed):
    model, n_trainable = make_model_for_variant(variant, seed)
    model.to(device) # Ensure all adapter layers are on the correct device

    # source_loaders = build_source_loaders(seed)
    # md_batcher = MultiDomainBatcher(source_loaders)
    # target_loader = build_target_loader()

    source_train_loaders, source_val_loaders = build_source_train_val_loaders(seed, val_frac=0.2)
    md_batcher = MultiDomainBatcher(source_train_loaders)
    target_loader = build_target_loader()


    criterion = nn.CrossEntropyLoss()

    if variant == "full_finetune":
        lr = CFG["lr_full"]
        # all params trainable already
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=CFG["weight_decay"])
    else:
        lr = CFG["lr_adapter"]
        trainable_params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.Adam(trainable_params, lr=lr, weight_decay=CFG["weight_decay"])

    history = []
    best_acc = -1.0
    best_path = None

    print(variant)

    for ep in range(CFG["epochs"]):
        t0 = time.time()
        print(ep)
        tr_loss, tr_acc = train_one_epoch_domain_balanced(
            model, md_batcher, optimizer, criterion, freeze_bn_stats=CFG["freeze_bn_stats"]
        )
        tgt_acc = evaluate_acc(model, target_loader)
        dt = time.time() - t0

        row = {
            "variant": variant,
            "seed": seed,
            "epoch": ep,
            "train_loss": tr_loss,
            "train_acc": tr_acc,
            "tgt_acc": tgt_acc,
            "lr": lr,
            "trainable_params": n_trainable,
            "scope": CFG["scope"],
        }
        history.append(row)
        print(f"[{variant} | seed={seed} | ep={ep:02d}] "
              f"loss={tr_loss:.3f} src_acc={tr_acc:.3f} tgt_acc={tgt_acc:.3f} time={dt:.1f}s")

        # if tgt_acc > best_acc:
        #     best_acc = tgt_acc
        #     best_path = os.path.join(PROJECT_ROOT, f"best_{CFG['scope']}_{variant}_seed{seed}.pt")
        #     torch.save(model.state_dict(), best_path)

        src_val_mean, src_val_list = evaluate_acc_multi(model, source_val_loaders)
        tgt_acc = evaluate_acc(model, target_loader)  # log only

        row.update({
            "src_val_mean": src_val_mean,
            "src_val_photo": src_val_list[0],
            "src_val_art": src_val_list[1],
            "src_val_cartoon": src_val_list[2],
        })

        # select by source validation ONLY
        if src_val_mean > best_acc:
            best_acc = src_val_mean
            best_path = os.path.join(PROJECT_ROOT, f"best_{CFG['scope']}_{variant}_seed{seed}.pt")
            torch.save(model.state_dict(), best_path)


    # final save
    final_path = os.path.join(PROJECT_ROOT, f"final_{CFG['scope']}_{variant}_seed{seed}.pt")
    torch.save(model.state_dict(), final_path)

    return history, best_acc, best_path, final_path

all_rows = []
summary_rows = []

for variant in VARIANTS:
    for seed in CFG["seeds"]:
        hist, best_acc, best_path, final_path = run_variant(variant, seed)
        print(">>>")
        all_rows.extend(hist)
        summary_rows.append({
            "variant": variant,
            "seed": seed,
            "best_source_Val_meanACC": best_acc,
            "best_ckpt": best_path,
            "final_ckpt": final_path,
            "scope": CFG["scope"],
        })


print("yeahhh")

df_hist = pd.DataFrame(all_rows)
df_sum = pd.DataFrame(summary_rows)

hist_csv = os.path.join(PROJECT_ROOT, f"e3_adapter_hist_{CFG['scope']}.csv")
sum_csv  = os.path.join(PROJECT_ROOT, f"e3_adapter_summary_{CFG['scope']}.csv")
df_hist.to_csv(hist_csv, index=False)
df_sum.to_csv(sum_csv, index=False)

print("\nSaved:")
print(hist_csv)
print(sum_csv)
df_sum.groupby("variant")["best_tgt_acc"].agg(["mean","std"]).sort_values("mean", ascending=False)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 182MB/s]


soma_minor
0
[soma_minor | seed=42 | ep=00] loss=0.620 src_acc=0.788 tgt_acc=0.503 time=1760.3s
1
[soma_minor | seed=42 | ep=01] loss=0.242 src_acc=0.916 tgt_acc=0.532 time=218.6s
2
[soma_minor | seed=42 | ep=02] loss=0.144 src_acc=0.953 tgt_acc=0.622 time=140.1s
3
[soma_minor | seed=42 | ep=03] loss=0.112 src_acc=0.962 tgt_acc=0.614 time=86.2s
4
[soma_minor | seed=42 | ep=04] loss=0.107 src_acc=0.964 tgt_acc=0.599 time=64.3s
5
[soma_minor | seed=42 | ep=05] loss=0.082 src_acc=0.971 tgt_acc=0.558 time=55.6s
6
[soma_minor | seed=42 | ep=06] loss=0.069 src_acc=0.975 tgt_acc=0.624 time=46.1s
7
[soma_minor | seed=42 | ep=07] loss=0.048 src_acc=0.986 tgt_acc=0.572 time=47.4s
8
[soma_minor | seed=42 | ep=08] loss=0.049 src_acc=0.986 tgt_acc=0.489 time=43.4s
9
[soma_minor | seed=42 | ep=09] loss=0.047 src_acc=0.983 tgt_acc=0.595 time=43.8s
10
[soma_minor | seed=42 | ep=10] loss=0.039 src_acc=0.988 tgt_acc=0.566 time=42.1s
11
[soma_minor | seed=42 | ep=11] loss=0.032 src_acc=0.991 tgt_acc=0.56

KeyError: 'Column not found: best_tgt_acc'