In [1]:
import os
import random
import math
import copy
import json
from dataclasses import dataclass
from typing import Tuple, Dict, Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# ======================== Random Seed ========================

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
@dataclass
class Config:
    """실험 설정"""
    data_dir: str = "C://Users/park9/HAR/SSL_HAR/data"
    save_dir: str = "C://Users/park9/HAR/SSL_HAR/RESULTS/IMPROVE/ALL"

    # SSL Pretrain 파라미터
    pretrain_epochs: int = 100
    pretrain_batch_size: int = 512  # ✅ InfoNCE 성능 향상
    pretrain_lr: float = 1e-3
    pretrain_warmup_epochs: int = 10  # ✅ Warmup

    # Supervised / Linear Eval / Fine-tune 파라미터
    finetune_epochs: int = 50
    finetune_batch_size: int = 128
    finetune_lr: float = 3e-4
    finetune_warmup_epochs: int = 5  # ✅ Warmup
    finetune_backbone_lr_ratio: float = 0.1  # ✅ 백본 LR 비율

    # 공통 파라미터
    weight_decay: float = 1e-4
    grad_clip: float = 1.0
    label_smoothing: float = 0.05
    use_ema: bool = True  # ✅ EMA 사용
    ema_decay: float = 0.9995  # ✅ EMA decay
    consistency_weight: float = 0.2  # ✅ 일관성 손실 가중치

    # 모델 파라미터
    d_model: int = 128
    n_heads: int = 4
    n_layers: int = 2
    dropout: float = 0.1
    hyperbolic_c_init: float = 1.0  # ✅ 초기값 (학습 가능)

    # SSL 파라미터
    temperature: float = 0.07
    projection_dim: int = 128

    # Augmentation 파라미터 (✅ 전이 강건성 최적화)
    aug_jitter_scale: float = 0.05
    aug_scale_range: Tuple[float, float] = (0.8, 1.2)
    aug_channel_drop_prob: float = 0.2
    aug_time_warp_prob: float = 0.10  # ✅ 0.3 → 0.10
    aug_cutout_prob: float = 0.20     # ✅ 0.3 → 0.20
    aug_cutout_ratio: float = 0.10    # ✅ 0.2 → 0.10

    # 시스템 파라미터
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    num_workers: int = 0

In [4]:
# ======================== Dataset Configuration ========================

INERTIAL_SIGNALS_FOLDER = "Inertial Signals"
RAW_CHANNELS = [
    ("total_acc_x_", "txt"), ("total_acc_y_", "txt"), ("total_acc_z_", "txt"),
    ("body_acc_x_", "txt"), ("body_acc_y_", "txt"), ("body_acc_z_", "txt"),
    ("body_gyro_x_", "txt"), ("body_gyro_y_", "txt"), ("body_gyro_z_", "txt"),
]
_LABEL_MAP = {1:"WALKING", 2:"WALKING_UPSTAIRS", 3:"WALKING_DOWNSTAIRS", 4:"SITTING", 5:"STANDING", 6:"LAYING"}
_CODE_TO_LABEL_NAME = {i-1: _LABEL_MAP[i] for i in _LABEL_MAP}
LABEL_NAME_TO_CODE = {v: k for k, v in _CODE_TO_LABEL_NAME.items()}

def load_split_raw(root: str, split: str):
    assert split in ("train", "test")
    inertial_path = os.path.join(root, split, INERTIAL_SIGNALS_FOLDER)

    # 존재 확인(문제 있으면 바로 어디가 없는지 알려줌)
    if not os.path.isdir(inertial_path):
        raise FileNotFoundError(f"[Missing dir] {inertial_path}")

    X_list = []
    for p, e in RAW_CHANNELS:
        fpath = os.path.join(inertial_path, f"{p}{split}.{e}")  # ex) body_acc_x_train.txt
        if not os.path.isfile(fpath):
            raise FileNotFoundError(f"[Missing file] {fpath}")
        # URL 오인 방지: 파일 핸들로 전달
        with open(fpath, "r", encoding="utf-8") as f:
            arr = np.loadtxt(f)       # (N, 128)
        X_list.append(arr[..., None]) # (N, 128, 1)

    # 채널 모두 같은 샘플 수인지 체크(안전장치)
    n_samples = {x.shape[0] for x in X_list}
    if len(n_samples) != 1:
        raise ValueError(f"채널별 샘플 수 불일치: {n_samples}")

    X = np.concatenate(X_list, axis=-1).transpose(0, 2, 1)  # (N, 9, 128)

    y_path = os.path.join(root, split, f"y_{split}.txt")
    if not os.path.isfile(y_path):
        raise FileNotFoundError(f"[Missing file] {y_path}")
    with open(y_path, "r", encoding="utf-8") as f:
        y = np.loadtxt(f).astype(int) - 1  # 0-based

    print(f"[OK] {split}: X{X.shape}, y{y.shape}")
    return X, y

class UCIHARInertial(Dataset):
    """UCI-HAR Dataset (✅ 정규화 버그 수정)"""
    def __init__(self, root: str, split: str, mean=None, std=None,
                 preloaded_data: Tuple[np.ndarray, np.ndarray] = None):
        super().__init__()
        if preloaded_data is not None:
            X, y = preloaded_data
        else:
            X, y = load_split_raw(root, split)
        self.X = X.astype(np.float32)
        self.y = (y - 1).astype(np.int64) if y.min() >= 1 else y.astype(np.int64)

        # mean/std 세팅
        if mean is not None and std is not None:
            self.mean, self.std = mean, std
        else:
            self.mean = self.X.mean(axis=(0,2), keepdims=True)
            self.std = self.X.std(axis=(0,2), keepdims=True) + 1e-6

        # ✅ preloaded_data 여부와 무관하게 항상 train 통계로 정규화
        self.X = (self.X - self.mean) / self.std

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return (
            torch.from_numpy(self.X[idx]),
            torch.tensor(self.y[idx], dtype=torch.long)
        )

In [5]:
# ======================== Label-Independent Augmentations ========================

def random_jitter(x: torch.Tensor, scale: float = 0.05) -> torch.Tensor:
    """Add Gaussian noise"""
    noise = torch.randn_like(x) * scale
    return x + noise

def random_scaling(x: torch.Tensor, scale_range: Tuple[float, float] = (0.8, 1.2)) -> torch.Tensor:
    """Random scaling of amplitudes"""
    scale = torch.empty(x.size(0), x.size(1), 1, device=x.device).uniform_(*scale_range)
    return x * scale

def random_channel_drop(x: torch.Tensor, drop_prob: float = 0.2) -> torch.Tensor:
    """Randomly drop channels (set to zero)"""
    B, C, T = x.shape
    mask = torch.rand(B, C, 1, device=x.device) > drop_prob
    return x * mask.float()

def random_time_warp(x: torch.Tensor, warp_prob: float = 0.10) -> torch.Tensor:
    """Simple time warping by random interpolation"""
    if random.random() > warp_prob:
        return x

    B, C, T = x.shape
    warp_factor = random.uniform(0.8, 1.2)
    new_T = int(T * warp_factor)

    x_warped = F.interpolate(x, size=new_T, mode='linear', align_corners=False)

    if new_T > T:
        start = random.randint(0, new_T - T)
        x_warped = x_warped[:, :, start:start+T]
    elif new_T < T:
        pad_total = T - new_T
        pad_left = random.randint(0, pad_total)
        pad_right = pad_total - pad_left
        x_warped = F.pad(x_warped, (pad_left, pad_right), mode='replicate')

    return x_warped

def random_cutout(x: torch.Tensor, cutout_prob: float = 0.20, cutout_ratio: float = 0.10) -> torch.Tensor:
    """Randomly mask out a temporal segment"""
    if random.random() > cutout_prob:
        return x

    B, C, T = x.shape
    cutout_len = int(T * cutout_ratio)
    start = random.randint(0, T - cutout_len)
    x_cut = x.clone()
    x_cut[:, :, start:start+cutout_len] = 0
    return x_cut

def augment_time_series(x: torch.Tensor, cfg: Config) -> torch.Tensor:
    """Label-independent augmentation pipeline"""
    x_aug = x.clone()
    x_aug = random_jitter(x_aug, scale=cfg.aug_jitter_scale)
    x_aug = random_scaling(x_aug, scale_range=cfg.aug_scale_range)
    x_aug = random_channel_drop(x_aug, drop_prob=cfg.aug_channel_drop_prob)
    x_aug = random_time_warp(x_aug, warp_prob=cfg.aug_time_warp_prob)
    x_aug = random_cutout(x_aug, cutout_prob=cfg.aug_cutout_prob, cutout_ratio=cfg.aug_cutout_ratio)
    return x_aug

In [6]:
# ======================== Tail-Head Stitch (전이 유사 혼합) ========================

def tail_head_stitch(x_a: torch.Tensor, x_b: torch.Tensor, mix: float = 0.5) -> torch.Tensor:
    """
    Tail-Head Stitch: x_a의 앞부분 + x_b의 뒷부분
    전이 테스트셋과 유사한 경계 혼합 생성
    """
    B, C, T = x_a.shape
    mix_pts = int(T * mix)

    x_mix = x_a.clone()
    x_mix[:, :, -mix_pts:] = x_b[:, :, :mix_pts]

    return x_mix

In [7]:
# ======================== ResNet Building Blocks ========================

class ResBlock1D(nn.Module):
    """1D Residual Block"""
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, downsample=None):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, 1, kernel_size//2, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out

class ResNet1D(nn.Module):
    """1D ResNet Backbone"""
    def __init__(self, in_channels=9, d_model=128, num_blocks=[2, 2, 2]):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv1d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(d_model, num_blocks[2], stride=2)

        self.stride = 16

    def _make_layer(self, out_channels, num_blocks, stride):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, 1, stride, bias=False),
                nn.BatchNorm1d(out_channels)
            )

        layers = []
        layers.append(ResBlock1D(self.in_channels, out_channels, stride=stride, downsample=downsample))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(ResBlock1D(out_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

In [8]:
# ======================== Transformer Encoder ========================

class PositionalEncoding(nn.Module):
    """Sinusoidal Positional Encoding"""
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

class TransformerEncoder(nn.Module):
    """Transformer Encoder Module"""
    def __init__(self, d_model=128, n_heads=4, n_layers=2, dropout=0.1):
        super().__init__()
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            activation='gelu',
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.pos_encoder(x)
        x = self.dropout(x)
        x = self.transformer(x)
        x = x.permute(0, 2, 1)
        return x

In [9]:
# ======================== Backbone ========================

class ResNetTransformerBackbone(nn.Module):
    """ResNet + Transformer Encoder Backbone"""
    def __init__(self, in_channels=9, d_model=128, n_heads=4, n_layers=2, dropout=0.1):
        super().__init__()
        self.resnet = ResNet1D(in_channels=in_channels, d_model=d_model)
        self.transformer = TransformerEncoder(d_model=d_model, n_heads=n_heads, n_layers=n_layers, dropout=dropout)
        self.stride = self.resnet.stride

    def forward(self, x):
        fmap = self.resnet(x)
        fmap = self.transformer(fmap)
        return fmap

In [10]:
# ======================== Projection Head ========================

class ProjectionHead(nn.Module):
    """MLP projection head for contrastive learning"""
    def __init__(self, d_model, projection_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(d_model, d_model),
            nn.BatchNorm1d(d_model),
            nn.ReLU(),
            nn.Linear(d_model, projection_dim)
        )

    def forward(self, x):
        return self.net(x)

In [11]:
# ======================== Classification Heads ========================

class ClassificationHead(nn.Module):
    """Linear Classification Head"""
    def __init__(self, d_model: int, num_classes: int):
        super().__init__()
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, fmap):
        pooled = self.gap(fmap).squeeze(-1)
        logits = self.fc(pooled)
        return logits

class HyperbolicProjection(nn.Module):
    """Hyperbolic Space Projection (✅ 학습 가능한 c)"""
    def __init__(self, c_init=1.0):
        super().__init__()
        self.c = nn.Parameter(torch.tensor(c_init))

    def forward(self, x):
        # ✅ Feature norm clipping
        x = torch.clamp(x, -5.0, 5.0)

        c = torch.clamp(self.c, min=0.1, max=10.0)
        norm = torch.clamp(torch.norm(x, dim=-1, keepdim=True), min=1e-8)
        max_norm = (1.0 / math.sqrt(c)) - 1e-4
        scale = torch.clamp(norm, max=max_norm) / norm
        return x * scale

class HyperbolicClassificationHead(nn.Module):
    """Hyperbolic Space Classification Head"""
    def __init__(self, d_model: int, num_classes: int, c_init: float = 1.0):
        super().__init__()
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.pre_proj = nn.Linear(d_model, d_model)
        self.hyperbolic_proj = HyperbolicProjection(c_init=c_init)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, fmap):
        pooled = self.gap(fmap).squeeze(-1)
        h = self.pre_proj(pooled)
        h_hyp = self.hyperbolic_proj(h)
        logits = self.fc(h_hyp)
        return logits

In [12]:
# ======================== SSL Model ========================

class SSLModel(nn.Module):
    """Self-Supervised Learning Model"""
    def __init__(self, d_model=128, n_heads=4, n_layers=2, dropout=0.1, projection_dim=128):
        super().__init__()
        self.backbone = ResNetTransformerBackbone(
            in_channels=9, d_model=d_model, n_heads=n_heads, n_layers=n_layers, dropout=dropout
        )
        self.projection_head = ProjectionHead(d_model, projection_dim)
        self.gap = nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        """Returns normalized projection"""
        fmap = self.backbone(x)
        pooled = self.gap(fmap).squeeze(-1)
        z = self.projection_head(pooled)
        z = F.normalize(z, dim=-1)
        return z

In [13]:
# ======================== EMA Utility ========================

class EMA:
    """Exponential Moving Average for model parameters"""
    def __init__(self, model: nn.Module, decay: float = 0.9995):
        self.decay = decay
        self.shadow = {name: param.clone().detach()
                       for name, param in model.named_parameters() if param.requires_grad}
        self.backup = {}

    @torch.no_grad()
    def update(self, model: nn.Module):
        """Update EMA parameters"""
        for name, param in model.named_parameters():
            if param.requires_grad and name in self.shadow:
                self.shadow[name].mul_(self.decay).add_(param.data, alpha=1 - self.decay)

    def apply_shadow(self, model: nn.Module):
        """Apply EMA parameters to model"""
        self.backup = {name: param.data.clone() for name, param in model.named_parameters() if param.requires_grad}
        for name, param in model.named_parameters():
            if param.requires_grad and name in self.shadow:
                param.data.copy_(self.shadow[name])

    def restore(self, model: nn.Module):
        """Restore original parameters"""
        for name, param in model.named_parameters():
            if param.requires_grad and name in self.backup:
                param.data.copy_(self.backup[name])
        self.backup = {}

In [14]:
# ======================== Learning Rate Scheduler ========================

def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps):
    """Cosine annealing with linear warmup"""
    def lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * progress)))

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [15]:
# ======================== Contrastive Loss ========================

def contrastive_loss(z1: torch.Tensor, z2: torch.Tensor, temperature: float = 0.07) -> torch.Tensor:
    """NT-Xent Loss (InfoNCE)"""
    B = z1.shape[0]
    device = z1.device

    z = torch.cat([z1, z2], dim=0)
    sim_matrix = torch.mm(z, z.t()) / temperature

    labels = torch.arange(B, device=device)
    labels = torch.cat([labels + B, labels], dim=0)

    mask = torch.eye(2 * B, device=device, dtype=torch.bool)
    sim_matrix = sim_matrix.masked_fill(mask, -9e15)

    loss = F.cross_entropy(sim_matrix, labels)
    return loss

In [16]:
# ======================== Consistency Loss ========================

def consistency_loss(model: nn.Module, head: nn.Module, x_a: torch.Tensor, x_b: torch.Tensor,
                     mix: float = 0.5, device: str = "cuda") -> torch.Tensor:
    """
    전이-유사 일관성 손실
    Tail-Head Stitch 후 예측 분포의 KL divergence
    """
    with torch.no_grad():
        fmap_a = model.backbone(x_a) if hasattr(model, 'backbone') else model(x_a)
        fmap_b = model.backbone(x_b) if hasattr(model, 'backbone') else model(x_b)

        logits_a = head(fmap_a)
        logits_b = head(fmap_b)

        p_a = F.softmax(logits_a, dim=-1)
        p_b = F.softmax(logits_b, dim=-1)

    x_mix = tail_head_stitch(x_a, x_b, mix=mix)

    fmap_mix = model.backbone(x_mix) if hasattr(model, 'backbone') else model(x_mix)
    logits_mix = head(fmap_mix)
    p_mix = F.log_softmax(logits_mix, dim=-1)

    # Soft target: 0.5*p_a + 0.5*p_b
    p_target = 0.5 * p_a + 0.5 * p_b

    loss = F.kl_div(p_mix, p_target, reduction='batchmean')
    return loss

In [17]:
# ======================== Training Functions ========================

def pretrain_one_epoch(model: SSLModel, loader: DataLoader, opt: torch.optim.Optimizer,
                       scheduler, ema: Optional[EMA], cfg: Config):
    """SSL Pretrain: No labels, only contrastive loss"""
    model.train()
    total_loss, total_samples = 0.0, 0

    for x, _ in loader:
        x = x.to(cfg.device)
        x1 = augment_time_series(x, cfg)
        x2 = augment_time_series(x, cfg)

        opt.zero_grad(set_to_none=True)
        z1 = model(x1)
        z2 = model(x2)
        loss = contrastive_loss(z1, z2, temperature=cfg.temperature)

        if torch.isnan(loss):
            continue

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), cfg.grad_clip)
        opt.step()
        scheduler.step()

        if ema is not None:
            ema.update(model)

        total_loss += loss.item() * x.size(0)
        total_samples += x.size(0)

    return {"ssl_loss": total_loss / total_samples}

def linear_eval_epoch(backbone: nn.Module, head: nn.Module, loader: DataLoader,
                      opt: torch.optim.Optimizer, cfg: Config, train: bool = True):
    """Linear evaluation: Freeze backbone, train head only"""
    if train:
        backbone.eval()
        head.train()
    else:
        backbone.eval()
        head.eval()

    total_loss, total_correct, total_samples = 0.0, 0, 0

    for x, y in loader:
        x, y = x.to(cfg.device), y.to(cfg.device)

        with torch.no_grad():
            fmap = backbone(x)

        logits = head(fmap)
        loss = F.cross_entropy(logits, y, label_smoothing=cfg.label_smoothing if train else 0.0)

        if train:
            opt.zero_grad(set_to_none=True)
            loss.backward()
            opt.step()

        pred = logits.argmax(dim=-1)
        total_correct += (pred == y).sum().item()
        total_loss += loss.item() * y.size(0)
        total_samples += y.size(0)

    return {
        "loss": total_loss / total_samples,
        "acc": total_correct / total_samples
    }

def finetune_epoch(model: nn.Module, head: nn.Module, loader: DataLoader,
                   opt: torch.optim.Optimizer, scheduler, ema: Optional[EMA],
                   cfg: Config, train: bool = True):
    """Fine-tuning: Train both backbone and head (✅ 일관성 손실 추가)"""
    if train:
        model.train()
        head.train()
    else:
        model.eval()
        head.eval()

    total_loss, total_ce_loss, total_cons_loss = 0.0, 0.0, 0.0
    total_correct, total_samples = 0, 0

    data_iter = iter(loader)
    for x, y in loader:
        x, y = x.to(cfg.device), y.to(cfg.device)

        # Forward pass
        fmap = model.backbone(x) if hasattr(model, 'backbone') else model(x)
        logits = head(fmap)
        loss_ce = F.cross_entropy(logits, y, label_smoothing=cfg.label_smoothing if train else 0.0)

        # ✅ 일관성 손실 (학습 시에만)
        loss_cons = torch.tensor(0.0, device=cfg.device)
        if train and cfg.consistency_weight > 0:
            try:
                x_b, _ = next(data_iter)
            except StopIteration:
                data_iter = iter(loader)
                x_b, _ = next(data_iter)

            x_b = x_b.to(cfg.device)
            if x_b.size(0) == x.size(0):
                loss_cons = consistency_loss(model, head, x, x_b, mix=0.5, device=cfg.device)

        loss = loss_ce + cfg.consistency_weight * loss_cons

        if train:
            opt.zero_grad(set_to_none=True)
            loss.backward()
            nn.utils.clip_grad_norm_(list(model.parameters()) + list(head.parameters()), cfg.grad_clip)
            opt.step()
            scheduler.step()

            if ema is not None:
                ema.update(model)

        pred = logits.argmax(dim=-1)
        total_correct += (pred == y).sum().item()
        total_loss += loss.item() * y.size(0)
        total_ce_loss += loss_ce.item() * y.size(0)
        total_cons_loss += loss_cons.item() * y.size(0)
        total_samples += y.size(0)

    return {
        "loss": total_loss / total_samples,
        "ce_loss": total_ce_loss / total_samples,
        "cons_loss": total_cons_loss / total_samples,
        "acc": total_correct / total_samples
    }

@torch.no_grad()
def evaluate_model(backbone: nn.Module, head: nn.Module, loader: DataLoader,
                   ema: Optional[EMA], cfg: Config, use_ema: bool = False):
    """Evaluate model (✅ EMA 지원)"""
    if use_ema and ema is not None:
        ema.apply_shadow(backbone)

    backbone.eval()
    head.eval()
    y_true, y_pred = [], []

    for x, y in loader:
        x = x.to(cfg.device)
        fmap = backbone(x)
        logits = head(fmap)
        y_pred.append(logits.argmax(dim=-1).cpu().numpy())
        y_true.append(y.numpy())

    if use_ema and ema is not None:
        ema.restore(backbone)

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='macro')

    return acc, f1

In [18]:
# ======================== Transitional Test Set ========================

def create_transitional_test_set(
    orig_dataset: UCIHARInertial, class_A: str, class_B: str, p: float, mix: float
) -> Tuple[UCIHARInertial, dict]:
    """Create transitional test set (✅ 정규화 보장)"""
    X, y = orig_dataset.X.copy(), orig_dataset.y.copy()
    N, C, T = X.shape

    code_A, code_B = LABEL_NAME_TO_CODE[class_A], LABEL_NAME_TO_CODE[class_B]
    idx_A, idx_B = np.where(y == code_A)[0], np.where(y == code_B)[0]
    mix_pts = int(T * mix)

    targets_A = np.random.choice(idx_A, max(1, int(len(idx_A) * p)), replace=False)
    sources_B = np.random.choice(idx_B, len(targets_A), replace=True)
    for t, s in zip(targets_A, sources_B):
        X[t, :, -mix_pts:] = orig_dataset.X[s, :, :mix_pts]

    targets_B = np.random.choice(idx_B, max(1, int(len(idx_B) * p)), replace=False)
    sources_A = np.random.choice(idx_A, len(targets_B), replace=True)
    for t, s in zip(targets_B, sources_A):
        X[t, :, -mix_pts:] = orig_dataset.X[s, :, :mix_pts]

    # (수정) 이중 정규화 방지: 원본 스케일로 복원
    X_restored = (X * orig_dataset.std) + orig_dataset.mean

    # ✅ train 통계로 정규화하도록 mean/std 전달
    mod_dataset = UCIHARInertial(
        root="", split="test", mean=orig_dataset.mean, std=orig_dataset.std,
        preloaded_data=(X_restored, y)
    )

    info = {
        'class_A': class_A,
        'class_B': class_B,
        'p': p,
        'mix': mix,
        'modified_samples': len(targets_A) + len(targets_B),
        'modified_ratio': (len(targets_A) + len(targets_B)) / N,
    }
    return mod_dataset, info

# ======================== JSON Encoder ========================

class NumpyEncoder(json.JSONEncoder):
    """JSON Encoder for NumPy types"""
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

# ======================== Main Experiment Function ========================

def run_full_comparison(cfg: Config):
    """Run complete supervised vs SSL comparison"""
    os.makedirs(cfg.save_dir, exist_ok=True)

    # Load datasets
    print("\n📦 Loading UCI-HAR Dataset...")
    train_set = UCIHARInertial(cfg.data_dir, "train")
    test_set_orig = UCIHARInertial(cfg.data_dir, "test", mean=train_set.mean, std=train_set.std)
    print(f"   - Train samples: {len(train_set)}")
    print(f"   - Test samples: {len(test_set_orig)}")

    # Create transitional test sets (✅ 2레벨 강도)
    scenarios = [
        # Level 1: Moderate (중간 강도)
        ("STANDING", "SITTING", 0.50, 0.40),
        ("WALKING", "WALKING_UPSTAIRS", 0.55, 0.42),
        # Level 2: Strong (강한 강도)
        ("STANDING", "SITTING", 0.70, 0.55),
        ("WALKING", "WALKING_UPSTAIRS", 0.65, 0.52),
        ("SITTING", "LAYING", 0.75, 0.58),
    ]

    print("\n" + "="*80)
    print("    🔬 TRANSITIONAL TEST SETS 생성 (2레벨 강도)")
    print("="*80)

    transition_test_data = []
    for i, (clsA, clsB, p, mix) in enumerate(scenarios):
        test_set_mod, info = create_transitional_test_set(test_set_orig, clsA, clsB, p=p, mix=mix)
        transition_test_data.append((test_set_mod, info))
        level = "Moderate" if i < 2 else "Strong"
        print(f"   - [{level}] {clsA}↔{clsB} (p={p:.2f}, mix={mix:.2f}): {info['modified_samples']}개 샘플 변형")

    # Experiment configurations
    experiment_configs = [
        {"name": "Supervised_Linear", "method": "supervised", "use_hyperbolic": False},
        {"name": "Supervised_Hyperbolic", "method": "supervised", "use_hyperbolic": True},
        {"name": "SSL_LinearEval_Linear", "method": "ssl", "mode": "linear_eval", "use_hyperbolic": False},
        {"name": "SSL_LinearEval_Hyperbolic", "method": "ssl", "mode": "linear_eval", "use_hyperbolic": True},
        {"name": "SSL_FineTune_Linear", "method": "ssl", "mode": "finetune", "use_hyperbolic": False},
        {"name": "SSL_FineTune_Hyperbolic", "method": "ssl", "mode": "finetune", "use_hyperbolic": True},
    ]

    results_table = []

    for exp_cfg in experiment_configs:
        print(f"\n{'='*80}\n   실험: {exp_cfg['name']}\n{'='*80}")

        random.seed(SEED)
        np.random.seed(SEED)
        torch.manual_seed(SEED)
        torch.cuda.manual_seed_all(SEED)

        if exp_cfg['method'] == 'supervised':
            # Supervised Learning
            print("\n📚 Supervised Learning (With Labels)")
            print("-" * 80)

            backbone = ResNetTransformerBackbone(
                in_channels=9, d_model=cfg.d_model, n_heads=cfg.n_heads,
                n_layers=cfg.n_layers, dropout=cfg.dropout
            ).to(cfg.device)

            if exp_cfg['use_hyperbolic']:
                head = HyperbolicClassificationHead(cfg.d_model, num_classes=6, c_init=cfg.hyperbolic_c_init).to(cfg.device)
            else:
                head = ClassificationHead(cfg.d_model, num_classes=6).to(cfg.device)

            finetune_loader = DataLoader(train_set, cfg.finetune_batch_size, shuffle=True, num_workers=cfg.num_workers)
            test_loader_orig = DataLoader(test_set_orig, cfg.finetune_batch_size, num_workers=cfg.num_workers)

            params = list(backbone.parameters()) + list(head.parameters())
            opt = torch.optim.AdamW(params, lr=cfg.finetune_lr, weight_decay=cfg.weight_decay)

            # ✅ Cosine + Warmup scheduler
            total_steps = cfg.finetune_epochs * len(finetune_loader)
            warmup_steps = cfg.finetune_warmup_epochs * len(finetune_loader)
            scheduler = get_cosine_schedule_with_warmup(opt, warmup_steps, total_steps)

            # ✅ EMA
            ema = EMA(backbone, decay=cfg.ema_decay) if cfg.use_ema else None

            def train_supervised_epoch(backbone, head, loader, opt, scheduler, ema, cfg, train=True):
                if train:
                    backbone.train()
                    head.train()
                else:
                    backbone.eval()
                    head.eval()

                total_loss, total_ce_loss, total_cons_loss = 0.0, 0.0, 0.0
                total_correct, total_samples = 0, 0

                data_iter = iter(loader)
                for x, y in loader:
                    x, y = x.to(cfg.device), y.to(cfg.device)

                    fmap = backbone(x)
                    logits = head(fmap)
                    loss_ce = F.cross_entropy(logits, y, label_smoothing=cfg.label_smoothing if train else 0.0)

                    # ✅ 일관성 손실
                    loss_cons = torch.tensor(0.0, device=cfg.device)
                    if train and cfg.consistency_weight > 0:
                        try:
                            x_b, _ = next(data_iter)
                        except StopIteration:
                            data_iter = iter(loader)
                            x_b, _ = next(data_iter)

                        x_b = x_b.to(cfg.device)
                        if x_b.size(0) == x.size(0):
                            loss_cons = consistency_loss(
                                type('Model', (), {'backbone': backbone})(),
                                head, x, x_b, mix=0.5, device=cfg.device
                            )

                    loss = loss_ce + cfg.consistency_weight * loss_cons

                    if train:
                        opt.zero_grad(set_to_none=True)
                        loss.backward()
                        nn.utils.clip_grad_norm_(params, cfg.grad_clip)
                        opt.step()
                        scheduler.step()

                        if ema is not None:
                            ema.update(backbone)

                    pred = logits.argmax(dim=-1)
                    total_correct += (pred == y).sum().item()
                    total_loss += loss.item() * y.size(0)
                    total_ce_loss += loss_ce.item() * y.size(0)
                    total_cons_loss += loss_cons.item() * y.size(0)
                    total_samples += y.size(0)

                return {
                    "loss": total_loss / total_samples,
                    "ce_loss": total_ce_loss / total_samples,
                    "cons_loss": total_cons_loss / total_samples,
                    "acc": total_correct / total_samples
                }

            best_acc, best_wts = 0.0, None
            print(f"Training for {cfg.finetune_epochs} epochs...")

            for epoch in range(1, cfg.finetune_epochs + 1):
                stats = train_supervised_epoch(backbone, head, finetune_loader, opt, scheduler, ema, cfg, train=True)
                te_acc, te_f1 = evaluate_model(backbone, head, test_loader_orig, ema, cfg, use_ema=cfg.use_ema)

                if te_acc > best_acc:
                    best_acc = te_acc
                    best_wts = {
                        'backbone': copy.deepcopy(backbone.state_dict()),
                        'head': copy.deepcopy(head.state_dict()),
                    }
                    if ema is not None:
                        best_wts['ema_shadow'] = copy.deepcopy(ema.shadow)

                if epoch % 10 == 0 or epoch == 1:
                    print(f"[Supervised {epoch:02d}/{cfg.finetune_epochs}] "
                          f"Train L:{stats['loss']:.4f} CE:{stats['ce_loss']:.4f} Cons:{stats['cons_loss']:.4f} A:{stats['acc']:.4f} | "
                          f"Test A:{te_acc:.4f} F1:{te_f1:.4f}")

            if best_wts:
                backbone.load_state_dict(best_wts['backbone'])
                head.load_state_dict(best_wts['head'])
                if ema is not None and 'ema_shadow' in best_wts:
                    ema.shadow = best_wts['ema_shadow']

            print(f"✅ Best Test Acc: {best_acc:.4f}")

            acc_orig, f1_orig = evaluate_model(backbone, head, test_loader_orig, ema, cfg, use_ema=cfg.use_ema)

            transition_results = []
            print("\n   🔍 전이 테스트셋 평가...")

            for i, (test_set_mod, info) in enumerate(transition_test_data):
                test_loader_mod = DataLoader(test_set_mod, cfg.finetune_batch_size, num_workers=cfg.num_workers)
                acc_trans, f1_trans = evaluate_model(backbone, head, test_loader_mod, ema, cfg, use_ema=cfg.use_ema)
                drop = acc_orig - acc_trans

                level = "Moderate" if i < 2 else "Strong"
                transition_results.append({
                    'scenario': i+1,
                    'level': level,
                    'class_A': info['class_A'],
                    'class_B': info['class_B'],
                    'p': info['p'],
                    'mix': info['mix'],
                    'class_acc': acc_trans,
                    'class_f1': f1_trans,
                    'class_drop': drop,
                })

                print(f"     - [{level}] Scenario {i+1} ({info['class_A']}↔{info['class_B']}): "
                      f"Acc={acc_trans:.4f} F1={f1_trans:.4f} (Drop={drop:.4f})")

            avg_trans_acc = np.mean([r['class_acc'] for r in transition_results])
            avg_trans_f1 = np.mean([r['class_f1'] for r in transition_results])
            avg_drop = acc_orig - avg_trans_acc
            retention = (1 - avg_drop / acc_orig) * 100 if acc_orig > 0 else 0

            # ✅ 레벨별 분석
            moderate_results = [r for r in transition_results if r['level'] == 'Moderate']
            strong_results = [r for r in transition_results if r['level'] == 'Strong']

            avg_moderate_acc = np.mean([r['class_acc'] for r in moderate_results]) if moderate_results else 0
            avg_strong_acc = np.mean([r['class_acc'] for r in strong_results]) if strong_results else 0

            retention_moderate = (1 - (acc_orig - avg_moderate_acc) / acc_orig) * 100 if acc_orig > 0 else 0
            retention_strong = (1 - (acc_orig - avg_strong_acc) / acc_orig) * 100 if acc_orig > 0 else 0

            results_table.append({
                "config": exp_cfg["name"],
                "method": "supervised",
                "mode": "supervised",
                "classifier": "Hyperbolic" if exp_cfg["use_hyperbolic"] else "Linear",
                "orig_acc": acc_orig,
                "orig_f1": f1_orig,
                "avg_trans_acc": avg_trans_acc,
                "avg_trans_f1": avg_trans_f1,
                "avg_drop": avg_drop,
                "retention": retention,
                "retention_moderate": retention_moderate,
                "retention_strong": retention_strong,
                "transition_results": transition_results
            })

        else:  # SSL
            # Stage 1: SSL Pretrain
            print("\n📚 Stage 1: Self-Supervised Pretraining (No Labels)")
            print("-" * 80)

            ssl_model = SSLModel(
                d_model=cfg.d_model, n_heads=cfg.n_heads, n_layers=cfg.n_layers,
                dropout=cfg.dropout, projection_dim=cfg.projection_dim
            ).to(cfg.device)

            pretrain_loader = DataLoader(train_set, cfg.pretrain_batch_size, shuffle=True, num_workers=cfg.num_workers)
            ssl_opt = torch.optim.AdamW(ssl_model.parameters(), lr=cfg.pretrain_lr, weight_decay=cfg.weight_decay)

            # ✅ Cosine + Warmup scheduler
            total_steps = cfg.pretrain_epochs * len(pretrain_loader)
            warmup_steps = cfg.pretrain_warmup_epochs * len(pretrain_loader)
            ssl_scheduler = get_cosine_schedule_with_warmup(ssl_opt, warmup_steps, total_steps)

            # ✅ EMA
            ssl_ema = EMA(ssl_model, decay=cfg.ema_decay) if cfg.use_ema else None

            print(f"Pretraining for {cfg.pretrain_epochs} epochs...")
            for epoch in range(1, cfg.pretrain_epochs + 1):
                stats = pretrain_one_epoch(ssl_model, pretrain_loader, ssl_opt, ssl_scheduler, ssl_ema, cfg)

                if epoch % 10 == 0 or epoch == 1:
                    print(f"[Pretrain {epoch:03d}/{cfg.pretrain_epochs}] SSL Loss: {stats['ssl_loss']:.4f}")

            # ✅ EMA 적용
            if ssl_ema is not None:
                ssl_ema.apply_shadow(ssl_model)

            print("✅ Pretraining Complete!")

            # Stage 2: Linear Eval or Fine-tune
            print(f"\n📚 Stage 2: {exp_cfg['mode'].upper()} (With Labels)")
            print("-" * 80)

            if exp_cfg['use_hyperbolic']:
                head = HyperbolicClassificationHead(cfg.d_model, num_classes=6, c_init=cfg.hyperbolic_c_init).to(cfg.device)
            else:
                head = ClassificationHead(cfg.d_model, num_classes=6).to(cfg.device)

            finetune_loader = DataLoader(train_set, cfg.finetune_batch_size, shuffle=True, num_workers=cfg.num_workers)
            test_loader_orig = DataLoader(test_set_orig, cfg.finetune_batch_size, num_workers=cfg.num_workers)

            if exp_cfg['mode'] == 'linear_eval':
                for param in ssl_model.backbone.parameters():
                    param.requires_grad = False

                opt = torch.optim.AdamW(head.parameters(), lr=cfg.finetune_lr, weight_decay=cfg.weight_decay)

                total_steps = cfg.finetune_epochs * len(finetune_loader)
                warmup_steps = cfg.finetune_warmup_epochs * len(finetune_loader)
                scheduler = get_cosine_schedule_with_warmup(opt, warmup_steps, total_steps)

                ema = None  # Linear eval에서는 백본이 frozen이므로 EMA 불필요

                train_fn = lambda: linear_eval_epoch(ssl_model.backbone, head, finetune_loader, opt, cfg, train=True)

            else:  # finetune
                for param in ssl_model.backbone.parameters():
                    param.requires_grad = True

                # ✅ 백본/헤드 분리 학습률
                backbone_params = list(ssl_model.backbone.parameters())
                head_params = list(head.parameters())

                opt = torch.optim.AdamW([
                    {'params': backbone_params, 'lr': cfg.finetune_lr * cfg.finetune_backbone_lr_ratio},
                    {'params': head_params, 'lr': cfg.finetune_lr},
                ], weight_decay=cfg.weight_decay)

                total_steps = cfg.finetune_epochs * len(finetune_loader)
                warmup_steps = cfg.finetune_warmup_epochs * len(finetune_loader)
                scheduler = get_cosine_schedule_with_warmup(opt, warmup_steps, total_steps)

                # ✅ EMA (백본만)
                ema = EMA(ssl_model.backbone, decay=cfg.ema_decay) if cfg.use_ema else None

                train_fn = lambda: finetune_epoch(ssl_model, head, finetune_loader, opt, scheduler, ema, cfg, train=True)

            best_acc, best_wts = 0.0, None
            print(f"{exp_cfg['mode']} for {cfg.finetune_epochs} epochs...")

            for epoch in range(1, cfg.finetune_epochs + 1):
                stats = train_fn()
                te_acc, te_f1 = evaluate_model(ssl_model.backbone, head, test_loader_orig, ema, cfg, use_ema=(cfg.use_ema and exp_cfg['mode'] == 'finetune'))

                if te_acc > best_acc:
                    best_acc = te_acc
                    best_wts = {
                        'head': copy.deepcopy(head.state_dict()),
                    }
                    if exp_cfg['mode'] == 'finetune':
                        best_wts['backbone'] = copy.deepcopy(ssl_model.backbone.state_dict())
                        if ema is not None:
                            best_wts['ema_shadow'] = copy.deepcopy(ema.shadow)

                if epoch % 10 == 0 or epoch == 1:
                    if 'cons_loss' in stats:
                        print(f"[{exp_cfg['mode']} {epoch:02d}/{cfg.finetune_epochs}] "
                              f"Train L:{stats['loss']:.4f} CE:{stats['ce_loss']:.4f} Cons:{stats['cons_loss']:.4f} A:{stats['acc']:.4f} | "
                              f"Test A:{te_acc:.4f} F1:{te_f1:.4f}")
                    else:
                        print(f"[{exp_cfg['mode']} {epoch:02d}/{cfg.finetune_epochs}] "
                              f"Train L:{stats['loss']:.4f} A:{stats['acc']:.4f} | "
                              f"Test A:{te_acc:.4f} F1:{te_f1:.4f}")

            if best_wts:
                head.load_state_dict(best_wts['head'])
                if exp_cfg['mode'] == 'finetune':
                    ssl_model.backbone.load_state_dict(best_wts['backbone'])
                    if ema is not None and 'ema_shadow' in best_wts:
                        ema.shadow = best_wts['ema_shadow']

            print(f"✅ Best Test Acc: {best_acc:.4f}")

            acc_orig, f1_orig = evaluate_model(ssl_model.backbone, head, test_loader_orig, ema, cfg, use_ema=(cfg.use_ema and exp_cfg['mode'] == 'finetune'))

            transition_results = []
            print("\n   🔍 전이 테스트셋 평가...")

            for i, (test_set_mod, info) in enumerate(transition_test_data):
                test_loader_mod = DataLoader(test_set_mod, cfg.finetune_batch_size, num_workers=cfg.num_workers)
                acc_trans, f1_trans = evaluate_model(ssl_model.backbone, head, test_loader_mod, ema, cfg, use_ema=(cfg.use_ema and exp_cfg['mode'] == 'finetune'))
                drop = acc_orig - acc_trans

                level = "Moderate" if i < 2 else "Strong"
                transition_results.append({
                    'scenario': i+1,
                    'level': level,
                    'class_A': info['class_A'],
                    'class_B': info['class_B'],
                    'p': info['p'],
                    'mix': info['mix'],
                    'class_acc': acc_trans,
                    'class_f1': f1_trans,
                    'class_drop': drop,
                })

                print(f"     - [{level}] Scenario {i+1} ({info['class_A']}↔{info['class_B']}): "
                      f"Acc={acc_trans:.4f} F1={f1_trans:.4f} (Drop={drop:.4f})")

            avg_trans_acc = np.mean([r['class_acc'] for r in transition_results])
            avg_trans_f1 = np.mean([r['class_f1'] for r in transition_results])
            avg_drop = acc_orig - avg_trans_acc
            retention = (1 - avg_drop / acc_orig) * 100 if acc_orig > 0 else 0

            # ✅ 레벨별 분석
            moderate_results = [r for r in transition_results if r['level'] == 'Moderate']
            strong_results = [r for r in transition_results if r['level'] == 'Strong']

            avg_moderate_acc = np.mean([r['class_acc'] for r in moderate_results]) if moderate_results else 0
            avg_strong_acc = np.mean([r['class_acc'] for r in strong_results]) if strong_results else 0

            retention_moderate = (1 - (acc_orig - avg_moderate_acc) / acc_orig) * 100 if acc_orig > 0 else 0
            retention_strong = (1 - (acc_orig - avg_strong_acc) / acc_orig) * 100 if acc_orig > 0 else 0

            results_table.append({
                "config": exp_cfg["name"],
                "method": "ssl",
                "mode": exp_cfg['mode'],
                "classifier": "Hyperbolic" if exp_cfg["use_hyperbolic"] else "Linear",
                "orig_acc": acc_orig,
                "orig_f1": f1_orig,
                "avg_trans_acc": avg_trans_acc,
                "avg_trans_f1": avg_trans_f1,
                "avg_drop": avg_drop,
                "retention": retention,
                "retention_moderate": retention_moderate,
                "retention_strong": retention_strong,
                "transition_results": transition_results
            })

    # Print final results
    print(f"\n{'='*80}")
    print("   📊 SUPERVISED vs TRUE SSL 실험 결과 (최적화 버전)")
    print("="*80)
    print(f"{'Config':<35} {'Method':<12} {'Mode':<12} {'Classifier':<12} {'Orig Acc':<10} {'Trans Acc':<11} {'Drop':<10} {'Retention':<10}")
    print("-" * 115)

    for r in results_table:
        print(f"{r['config']:<35} {r['method']:<12} {r['mode']:<12} {r['classifier']:<12} "
              f"{r['orig_acc']:.4f}     {r['avg_trans_acc']:.4f}      "
              f"{r['avg_drop']:.4f}  {r['retention']:.2f}%")

    # ✅ 레벨별 결과 추가
    print("\n" + "="*80)
    print("📊 레벨별 Retention 분석")
    print("="*80)
    print(f"{'Config':<35} {'Overall':<12} {'Moderate':<12} {'Strong':<12}")
    print("-" * 80)

    for r in results_table:
        print(f"{r['config']:<35} {r['retention']:>6.2f}%      {r['retention_moderate']:>6.2f}%       {r['retention_strong']:>6.2f}%")

    # Detailed analysis
    print("\n" + "="*80)
    print("📊 상세 비교 분석")
    print("="*80)

    # Final ranking
    sorted_results = sorted(results_table, key=lambda x: x['retention'], reverse=True)
    print("\n🏆 최종 성능 랭킹 (Overall Retention 기준)")
    print("-" * 80)

    for rank, r in enumerate(sorted_results, 1):
        method_mode = f"{r['method']}-{r['mode']}" if r['method'] == 'ssl' else r['method']
        print(f"   {rank}. {r['config']:<35} ({method_mode:<20}) "
              f"Retention: {r['retention']:.2f}% (Mod: {r['retention_moderate']:.2f}% | Str: {r['retention_strong']:.2f}%)")

    best_config = sorted_results[0]
    best_ssl = max([r for r in results_table if r['method'] == 'ssl'], key=lambda x: x['retention'])
    best_sup = max([r for r in results_table if r['method'] == 'supervised'], key=lambda x: x['retention'])

    print("\n" + "="*80)
    print("🎯 결론")
    print("="*80)
    print(f"   - 최고 성능: {best_config['config']} (Retention: {best_config['retention']:.2f}%)")
    print(f"   - Supervised baseline: {best_sup['retention']:.2f}% (Mod: {best_sup['retention_moderate']:.2f}% | Str: {best_sup['retention_strong']:.2f}%)")
    print(f"   - SSL best: {best_ssl['retention']:.2f}% (Mod: {best_ssl['retention_moderate']:.2f}% | Str: {best_ssl['retention_strong']:.2f}%)")
    print(f"   - Performance gap: {abs(best_ssl['retention'] - best_sup['retention']):.2f}pp")

    # ✅ 개선 효과 분석
    print("\n   ✨ 최적화 개선사항:")
    print("   - 정규화 버그 수정 ✅")
    print("   - Cosine + Warmup 스케줄러 ✅")
    print("   - EMA (Exponential Moving Average) ✅")
    print("   - 백본/헤드 분리 학습률 (Fine-tune) ✅")
    print("   - 전이-유사 일관성 손실 (Tail-Head Stitch) ✅")
    print(f"   - 증강 강도 최적화 (warp: 0.10, cutout: 0.10) ✅")
    print("   - 학습 가능한 Hyperbolic c ✅")
    print("   - 2레벨 전이 시나리오 (Moderate/Strong) ✅")

    # Save results
    save_path = os.path.join(cfg.save_dir, "supervised_vs_ssl_results_optimized.json")
    with open(save_path, "w") as f:
        json.dump(results_table, f, indent=2, cls=NumpyEncoder)

    visualization_data = {
        'configs': [r['config'] for r in results_table],
        'methods': [r['method'] for r in results_table],
        'modes': [r['mode'] for r in results_table],
        'classifiers': [r['classifier'] for r in results_table],
        'orig_acc': [r['orig_acc'] for r in results_table],
        'orig_f1': [r['orig_f1'] for r in results_table],
        'trans_acc': [r['avg_trans_acc'] for r in results_table],
        'trans_f1': [r['avg_trans_f1'] for r in results_table],
        'retention': [r['retention'] for r in results_table],
        'retention_moderate': [r['retention_moderate'] for r in results_table],
        'retention_strong': [r['retention_strong'] for r in results_table],
        'avg_drop': [r['avg_drop'] for r in results_table]
    }

    viz_path = os.path.join(cfg.save_dir, "visualization_data_optimized.json")
    with open(viz_path, "w") as f:
        json.dump(visualization_data, f, indent=2, cls=NumpyEncoder)

    print(f"\n✅ Results saved to:")
    print(f"   - {save_path}")
    print(f"   - {viz_path}")
    print("="*80)

In [19]:
# ======================== Main Entry Point ========================

def main():
    """Main function"""
    config = Config()

    print("\n" + "="*80)
    print("   🧪 UCI-HAR Comprehensive Comparison (✨ OPTIMIZED)")
    print("   SUPERVISED vs TRUE SELF-SUPERVISED LEARNING")
    print("   Architecture: ResNet + Transformer Encoder")
    print("="*80)
    print("\n   📋 실험 설계:")
    print("   1. 동일한 백본 (ResNet + Transformer)")
    print("   2. 2레벨 전이 데이터셋 (Moderate/Strong)")
    print("   3. 6가지 설정 비교:")
    print("      ├─ Supervised × (Linear, Hyperbolic)")
    print("      ├─ SSL Linear Eval × (Linear, Hyperbolic)")
    print("      └─ SSL Fine-tune × (Linear, Hyperbolic)")
    print("="*80)
    print("\n   ⚙️  Supervised 설정:")
    print(f"   - Epochs: {config.finetune_epochs}")
    print(f"   - Batch size: {config.finetune_batch_size}")
    print(f"   - Learning rate: {config.finetune_lr}")
    print(f"   - Warmup: {config.finetune_warmup_epochs} epochs")
    print(f"   - EMA decay: {config.ema_decay if config.use_ema else 'Disabled'}")
    print(f"   - Consistency weight: {config.consistency_weight}")
    print(f"   - Training: End-to-end with labels + consistency loss")
    print(f"\n   ⚙️  SSL 설정:")
    print(f"   - Stage 1 (Pretrain): {config.pretrain_epochs} epochs, batch={config.pretrain_batch_size}, lr={config.pretrain_lr}")
    print(f"     → Contrastive learning only (NO LABELS)")
    print(f"     → Label-independent augmentation")
    print(f"     → Warmup: {config.pretrain_warmup_epochs} epochs")
    print(f"     → EMA decay: {config.ema_decay if config.use_ema else 'Disabled'}")
    print(f"   - Stage 2 (Eval/FT): {config.finetune_epochs} epochs, batch={config.finetune_batch_size}, lr={config.finetune_lr}")
    print(f"     → Linear Eval: Freeze backbone")
    print(f"     → Fine-tune: Train all (backbone LR × {config.finetune_backbone_lr_ratio})")
    print(f"     → Consistency weight: {config.consistency_weight}")
    print(f"\n   🔧 Augmentations (SSL - Optimized):")
    print(f"   - Jitter (scale={config.aug_jitter_scale})")
    print(f"   - Scaling (range={config.aug_scale_range})")
    print(f"   - Channel Drop (prob={config.aug_channel_drop_prob})")
    print(f"   - Time Warp (prob={config.aug_time_warp_prob}) ✅ 0.3→0.10")
    print(f"   - Cutout (prob={config.aug_cutout_prob}, ratio={config.aug_cutout_ratio}) ✅ 0.2→0.10")
    print("   - ALL label-independent!")
    print(f"\n   🏗️  Architecture:")
    print(f"   - Backbone: ResNet(layers=[2,2,2]) + Transformer(heads={config.n_heads}, layers={config.n_layers})")
    print(f"   - d_model: {config.d_model}, dropout: {config.dropout}")
    print(f"   - Classifier: Linear vs Hyperbolic (c_init={config.hyperbolic_c_init}, learnable ✅)")
    print(f"   - Projection dim (SSL): {config.projection_dim}")
    print(f"\n   🔬 SSL Contrastive Learning:")
    print(f"   - Loss: NT-Xent (InfoNCE)")
    print(f"   - Temperature: {config.temperature}")
    print(f"   - Negative samples: 2*batch_size - 2")
    print(f"\n   ✨ 최적화 개선사항:")
    print("   - 정규화 버그 수정 (전이 테스트셋)")
    print("   - Cosine Annealing + Warmup")
    print("   - EMA (Exponential Moving Average)")
    print("   - 백본/헤드 분리 학습률 (Fine-tune)")
    print("   - 전이-유사 일관성 손실 (Tail-Head Stitch)")
    print("   - 증강 강도 최적화 (경계 정보 보존)")
    print("   - 학습 가능한 Hyperbolic c")
    print("   - 2레벨 전이 시나리오 (Moderate/Strong)")
    print("="*80 + "\n")

    run_full_comparison(config)

if __name__ == "__main__":
    main()


   🧪 UCI-HAR Comprehensive Comparison (✨ OPTIMIZED)
   SUPERVISED vs TRUE SELF-SUPERVISED LEARNING
   Architecture: ResNet + Transformer Encoder

   📋 실험 설계:
   1. 동일한 백본 (ResNet + Transformer)
   2. 2레벨 전이 데이터셋 (Moderate/Strong)
   3. 6가지 설정 비교:
      ├─ Supervised × (Linear, Hyperbolic)
      ├─ SSL Linear Eval × (Linear, Hyperbolic)
      └─ SSL Fine-tune × (Linear, Hyperbolic)

   ⚙️  Supervised 설정:
   - Epochs: 50
   - Batch size: 128
   - Learning rate: 0.0003
   - Warmup: 5 epochs
   - EMA decay: 0.9995
   - Consistency weight: 0.2
   - Training: End-to-end with labels + consistency loss

   ⚙️  SSL 설정:
   - Stage 1 (Pretrain): 100 epochs, batch=512, lr=0.001
     → Contrastive learning only (NO LABELS)
     → Label-independent augmentation
     → Warmup: 10 epochs
     → EMA decay: 0.9995
   - Stage 2 (Eval/FT): 50 epochs, batch=128, lr=0.0003
     → Linear Eval: Freeze backbone
     → Fine-tune: Train all (backbone LR × 0.1)
     → Consistency weight: 0.2

   🔧 Augmentations 