# SLX02 Text Branch: RoBERTa Fine-tuning (Kaggle)

**Project**: SLX02 - Conformer-GAT Fusion for Speech Emotion Recognition  
**Target**: 68-71% WA on IEMOCAP 4-class (text-only)  
**Benchmark**: RobinNet (2024) achieves 71.1% WA with RoBERTa

## Key Improvements (Based on Literature Review)
1. **LOSO Protocol** - Leave-One-Session-Out for speaker independence
2. **4-Class Mapping** - Standard: neu, hap+exc, ang+fru, sad
3. **Attention Pooling** - Better than mean/cls for SER
4. **Label Smoothing** - Prevents overconfidence
5. **Class Weights** - Handles imbalanced data
6. **Reports WA & UA** - Required for literature comparison

## Usage
- Upload `data/iemocap_manifest.jsonl` as Kaggle Dataset
- Use GPU Accelerator (T4 x2 or P100)
- Runtime: ~30-60 min per fold

In [None]:
# Install dependencies
!pip install -q -U transformers==4.44.0 accelerate>=1.0.0 datasets>=2.14 scikit-learn>=1.3.0 tqdm

In [None]:
# Check environment
import torch
import transformers
import numpy as np
import json
import re
import random
from pathlib import Path
from datetime import datetime
from collections import defaultdict
from typing import Dict, List, Optional, Tuple

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
print(f"Transformers: {transformers.__version__}")

In [None]:
# Configuration
class Config:
    # Data
    MANIFEST_PATH = "/kaggle/input/slx02-ser-dataset/iemocap_manifest.jsonl"
    OUT_DIR = "/kaggle/working/text_branch_results"
    
    # Model
    BACKBONE = "roberta-base"  # Best for text-only SER (RobinNet benchmark)
    MAX_LENGTH = 128
    POOLING = "attention"  # "attention", "cls", or "mean"
    
    # Training
    EPOCHS = 30
    BATCH_SIZE = 16  # Adjust based on GPU memory
    LR = 2e-5  # For classifier; encoder uses 0.1x
    WEIGHT_DECAY = 0.01
    WARMUP_RATIO = 0.1
    GRAD_CLIP = 1.0
    
    # Regularization
    DROPOUT = 0.3
    LABEL_SMOOTHING = 0.1
    USE_CLASS_WEIGHTS = True
    
    # Early stopping
    PATIENCE = 5
    
    # Evaluation
    NUM_CLASSES = 4  # Standard 4-class IEMOCAP
    
    # Reproducibility
    SEED = 42

config = Config()
Path(config.OUT_DIR).mkdir(parents=True, exist_ok=True)
print(f"Output directory: {config.OUT_DIR}")

In [None]:
# =============================================================================
# 4-Class Label Mapping (Standard IEMOCAP)
# =============================================================================
# From literature: neu(0), hap+exc(1), ang+fru(2), sad(3)

LABEL_4CLASS_MAP = {
    "neu": 0, "neutral": 0,
    "hap": 1, "happy": 1, "exc": 1, "excited": 1,
    "ang": 2, "angry": 2, "fru": 2, "frustration": 2,
    "sad": 3, "sadness": 3,
}

LABEL_NAMES = ["neutral", "happy", "angry", "sad"]

def canonicalize_label(label) -> int:
    """Map label to 4-class index. Returns -1 for invalid."""
    if label is None:
        return -1
    if isinstance(label, int):
        # 6-class to 4-class: 0->0, 1->1, 2->2, 3->3, 4->1(exc), 5->2(fru)
        map_6_to_4 = {0: 0, 1: 1, 2: 2, 3: 3, 4: 1, 5: 2}
        return map_6_to_4.get(label, -1)
    key = str(label).lower().strip()
    return LABEL_4CLASS_MAP.get(key, -1)

def get_session_id(record: Dict) -> Optional[int]:
    """Extract session number from record."""
    s = record.get("session")
    if s is None:
        return None
    if isinstance(s, int):
        return s
    m = re.search(r"(\d+)", str(s))
    return int(m.group(1)) if m else None

print("Label mapping defined:")
print(f"  Classes: {LABEL_NAMES}")
print(f"  Mapping: neu→0, hap+exc→1, ang+fru→2, sad→3")

In [None]:
# =============================================================================
# Load and Prepare Data
# =============================================================================

def read_manifest(path: str) -> List[Dict]:
    """Read JSONL manifest file."""
    records = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                records.append(json.loads(line))
    return records

def filter_valid_records(records: List[Dict]) -> List[Dict]:
    """Keep only records with valid 4-class labels."""
    valid = []
    for r in records:
        label = canonicalize_label(r.get("label"))
        if label >= 0:
            valid.append(r)
    return valid

def loso_split(records: List[Dict], test_session: int) -> Tuple[List[Dict], List[Dict]]:
    """Leave-One-Session-Out split."""
    train, test = [], []
    for r in records:
        sid = get_session_id(r)
        if sid == test_session:
            test.append(r)
        else:
            train.append(r)
    return train, test

def get_class_distribution(records: List[Dict]) -> Dict[int, int]:
    """Get class counts."""
    dist = defaultdict(int)
    for r in records:
        label = canonicalize_label(r.get("label"))
        if label >= 0:
            dist[label] += 1
    return dict(dist)

# Load data
print(f"Loading manifest from: {config.MANIFEST_PATH}")
all_records = read_manifest(config.MANIFEST_PATH)
all_records = filter_valid_records(all_records)

print(f"\nTotal valid samples (4-class): {len(all_records)}")
dist = get_class_distribution(all_records)
print(f"Class distribution:")
for i, name in enumerate(LABEL_NAMES):
    count = dist.get(i, 0)
    pct = count / len(all_records) * 100
    print(f"  {name}: {count} ({pct:.1f}%)")

In [None]:
# =============================================================================
# Model Components
# =============================================================================
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoModel, AutoTokenizer

class AttentionPooling(nn.Module):
    """Attention-based pooling (better than mean/cls for SER)."""
    
    def __init__(self, hidden_dim: int, dropout: float = 0.1):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 4),
            nn.Tanh(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 4, 1),
        )
    
    def forward(self, tokens: torch.Tensor, mask: torch.Tensor = None) -> torch.Tensor:
        weights = self.attention(tokens).squeeze(-1)  # (B, T)
        if mask is not None:
            weights = weights.masked_fill(mask == 0, float("-inf"))
        weights = F.softmax(weights, dim=-1)
        pooled = torch.bmm(weights.unsqueeze(1), tokens).squeeze(1)
        return pooled


class TextClassifier(nn.Module):
    """RoBERTa + Attention Pooling + Classification Head."""
    
    def __init__(
        self,
        backbone: str = "roberta-base",
        num_classes: int = 4,
        dropout: float = 0.3,
        pooling: str = "attention",
    ):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(backbone)
        hidden_size = self.encoder.config.hidden_size
        
        self.pooling_type = pooling
        if pooling == "attention":
            self.pooler = AttentionPooling(hidden_size, dropout)
        else:
            self.pooler = None
        
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.LayerNorm(hidden_size // 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, num_classes),
        )
        
        self.hidden_size = hidden_size
    
    def forward(self, input_ids, attention_mask, return_features=False):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        tokens = outputs.last_hidden_state
        
        if self.pooling_type == "attention" and self.pooler:
            pooled = self.pooler(tokens, attention_mask)
        elif self.pooling_type == "cls":
            pooled = tokens[:, 0, :]
        else:  # mean
            mask = attention_mask.unsqueeze(-1)
            pooled = (tokens * mask).sum(1) / mask.sum(1).clamp(min=1e-8)
        
        logits = self.classifier(pooled)
        
        if return_features:
            return {"logits": logits, "features": pooled}
        return {"logits": logits}

print("Model components defined ✓")

In [None]:
# =============================================================================
# Dataset and DataLoader
# =============================================================================

class IEMOCAPTextDataset(Dataset):
    """Text dataset for IEMOCAP with 4-class labels."""
    
    def __init__(self, records: List[Dict], tokenizer, max_length: int = 128):
        self.records = records
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.records)
    
    def __getitem__(self, idx):
        r = self.records[idx]
        text = r.get("text", "")
        label = canonicalize_label(r.get("label"))
        
        encoded = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
        )
        
        return {
            "input_ids": encoded["input_ids"].squeeze(0),
            "attention_mask": encoded["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }


class LabelSmoothingCE(nn.Module):
    """Cross entropy with label smoothing."""
    
    def __init__(self, smoothing: float = 0.1, weight: torch.Tensor = None):
        super().__init__()
        self.smoothing = smoothing
        self.weight = weight
    
    def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        n_classes = pred.size(-1)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (n_classes - 1))
            true_dist.scatter_(1, target.unsqueeze(1), 1.0 - self.smoothing)
        
        log_probs = F.log_softmax(pred, dim=-1)
        loss = (-true_dist * log_probs).sum(dim=-1)
        
        if self.weight is not None:
            weight = self.weight[target]
            loss = loss * weight
        
        return loss.mean()

print("Dataset and loss defined ✓")

In [None]:
# =============================================================================
# Training and Evaluation Functions
# =============================================================================
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix
from tqdm.auto import tqdm

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict:
    """Compute WA, UA, F1, and confusion matrix."""
    mask = y_true >= 0
    y_true, y_pred = y_true[mask], y_pred[mask]
    
    if len(y_true) == 0:
        return {"error": "No valid samples"}
    
    return {
        "WA": accuracy_score(y_true, y_pred),
        "UA": recall_score(y_true, y_pred, average="macro", zero_division=0),
        "F1_macro": f1_score(y_true, y_pred, average="macro", zero_division=0),
        "confusion_matrix": confusion_matrix(y_true, y_pred, labels=range(4)).tolist(),
        "n_samples": len(y_true),
    }

def train_epoch(model, loader, optimizer, scheduler, loss_fn, device, grad_clip=1.0):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    
    pbar = tqdm(loader, desc="Training", leave=False)
    for batch in pbar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)
        
        mask = labels >= 0
        if mask.sum() == 0:
            continue
        
        output = model(input_ids, attention_mask)
        loss = loss_fn(output["logits"][mask], labels[mask])
        
        optimizer.zero_grad()
        loss.backward()
        if grad_clip > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        optimizer.step()
        scheduler.step()
        
        total_loss += loss.item() * mask.sum().item()
        all_preds.extend(output["logits"].argmax(-1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        pbar.set_postfix({"loss": f"{loss.item():.4f}"})
    
    metrics = compute_metrics(np.array(all_labels), np.array(all_preds))
    metrics["loss"] = total_loss / len(all_labels) if all_labels else 0
    return metrics

@torch.no_grad()
def evaluate(model, loader, loss_fn, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    
    for batch in tqdm(loader, desc="Evaluating", leave=False):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)
        
        mask = labels >= 0
        if mask.sum() == 0:
            continue
        
        output = model(input_ids, attention_mask)
        loss = loss_fn(output["logits"][mask], labels[mask])
        
        total_loss += loss.item() * mask.sum().item()
        all_preds.extend(output["logits"].argmax(-1).cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    metrics = compute_metrics(np.array(all_labels), np.array(all_preds))
    metrics["loss"] = total_loss / len(all_labels) if all_labels else 0
    return metrics

print("Training functions defined ✓")

In [None]:
# =============================================================================
# Train Single Fold
# =============================================================================

def train_fold(train_records, val_records, fold_id, config):
    """Train a single LOSO fold."""
    set_seed(config.SEED + fold_id)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    print(f"\n{'='*60}")
    print(f"FOLD {fold_id} | Train: {len(train_records)} | Val: {len(val_records)}")
    print(f"{'='*60}")
    
    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(config.BACKBONE)
    
    # Datasets
    train_ds = IEMOCAPTextDataset(train_records, tokenizer, config.MAX_LENGTH)
    val_ds = IEMOCAPTextDataset(val_records, tokenizer, config.MAX_LENGTH)
    
    train_loader = DataLoader(train_ds, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_ds, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)
    
    # Model
    model = TextClassifier(
        backbone=config.BACKBONE,
        num_classes=config.NUM_CLASSES,
        dropout=config.DROPOUT,
        pooling=config.POOLING,
    ).to(device)
    
    # Class weights
    train_dist = get_class_distribution(train_records)
    counts = np.array([train_dist.get(i, 1) for i in range(4)])
    weights = 1.0 / (counts + 1e-6)
    weights = weights / weights.sum() * 4
    class_weights = torch.tensor(weights, dtype=torch.float32).to(device) if config.USE_CLASS_WEIGHTS else None
    
    # Loss
    loss_fn = LabelSmoothingCE(config.LABEL_SMOOTHING, class_weights)
    
    # Optimizer with differential LR
    encoder_params = list(model.encoder.parameters())
    head_params = list(model.classifier.parameters())
    if model.pooler:
        head_params += list(model.pooler.parameters())
    
    optimizer = AdamW([
        {"params": encoder_params, "lr": config.LR * 0.1},
        {"params": head_params, "lr": config.LR},
    ], weight_decay=config.WEIGHT_DECAY)
    
    # Scheduler
    total_steps = len(train_loader) * config.EPOCHS
    scheduler = OneCycleLR(
        optimizer,
        max_lr=[config.LR * 0.1, config.LR],
        total_steps=total_steps,
        pct_start=config.WARMUP_RATIO,
        anneal_strategy="cos",
    )
    
    # Training loop
    best_wa = 0
    best_epoch = 0
    patience_counter = 0
    history = []
    
    for epoch in range(config.EPOCHS):
        train_metrics = train_epoch(model, train_loader, optimizer, scheduler, loss_fn, device, config.GRAD_CLIP)
        val_metrics = evaluate(model, val_loader, loss_fn, device)
        
        print(f"Epoch {epoch+1:2d}/{config.EPOCHS} | "
              f"Train Loss: {train_metrics['loss']:.4f} WA: {train_metrics['WA']*100:.2f}% | "
              f"Val Loss: {val_metrics['loss']:.4f} WA: {val_metrics['WA']*100:.2f}% UA: {val_metrics['UA']*100:.2f}%")
        
        history.append({
            "epoch": epoch + 1,
            "train_loss": train_metrics["loss"],
            "train_WA": train_metrics["WA"],
            "val_loss": val_metrics["loss"],
            "val_WA": val_metrics["WA"],
            "val_UA": val_metrics["UA"],
        })
        
        # Save best
        if val_metrics["WA"] > best_wa:
            best_wa = val_metrics["WA"]
            best_ua = val_metrics["UA"]
            best_epoch = epoch + 1
            patience_counter = 0
            
            torch.save({
                "epoch": epoch + 1,
                "model_state_dict": model.state_dict(),
                "val_WA": best_wa,
                "val_UA": best_ua,
            }, f"{config.OUT_DIR}/fold{fold_id}_best.pt")
        else:
            patience_counter += 1
        
        if patience_counter >= config.PATIENCE:
            print(f"Early stopping at epoch {epoch + 1}")
            break
    
    print(f"\n✓ Fold {fold_id} Best: WA={best_wa*100:.2f}% UA={best_ua*100:.2f}% (Epoch {best_epoch})")
    
    # Load best and get final metrics
    ckpt = torch.load(f"{config.OUT_DIR}/fold{fold_id}_best.pt")
    model.load_state_dict(ckpt["model_state_dict"])
    final_metrics = evaluate(model, val_loader, loss_fn, device)
    final_metrics["best_epoch"] = best_epoch
    final_metrics["history"] = history
    
    return final_metrics, model

## Run Training

Choose one of:
1. **Single Fold** - Quick test on one session (faster)
2. **Full LOSO** - Complete 5-fold cross-validation (for final results)

In [None]:
# =============================================================================
# Option 1: Single Fold (Test on Session 5)
# =============================================================================
# Uncomment to run single fold for quick testing

TEST_SESSION = 5

train_records, val_records = loso_split(all_records, TEST_SESSION)

# Split train into train/val for early stopping (90/10)
np.random.seed(config.SEED)
indices = np.random.permutation(len(train_records))
val_size = int(len(train_records) * 0.1)
train_subset = [train_records[i] for i in indices[val_size:]]
val_subset = [train_records[i] for i in indices[:val_size]]

print(f"Session {TEST_SESSION} held out as test set")
print(f"Training: {len(train_subset)} | Validation: {len(val_subset)} | Test: {len(val_records)}")

fold_metrics, trained_model = train_fold(train_subset, val_subset, TEST_SESSION, config)

# Also evaluate on held-out test session
tokenizer = AutoTokenizer.from_pretrained(config.BACKBONE)
test_ds = IEMOCAPTextDataset(val_records, tokenizer, config.MAX_LENGTH)
test_loader = DataLoader(test_ds, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=2)

# Compute test metrics
trained_model.eval()
loss_fn = LabelSmoothingCE(config.LABEL_SMOOTHING)
test_metrics = evaluate(trained_model, test_loader, loss_fn, torch.device("cuda" if torch.cuda.is_available() else "cpu"))

print(f"\n{'='*60}")
print(f"TEST SET RESULTS (Session {TEST_SESSION})")
print(f"{'='*60}")
print(f"WA: {test_metrics['WA']*100:.2f}%")
print(f"UA: {test_metrics['UA']*100:.2f}%")
print(f"F1 (macro): {test_metrics['F1_macro']*100:.2f}%")

In [None]:
# =============================================================================
# Option 2: Full LOSO Cross-Validation (5 folds)
# =============================================================================
# Uncomment below to run full LOSO (takes longer but gives proper results)

"""
all_fold_results = []

for test_session in range(1, 6):
    print(f"\n{'#'*60}")
    print(f"# FOLD {test_session}: Test on Session {test_session}")
    print(f"{'#'*60}")
    
    train_recs, test_recs = loso_split(all_records, test_session)
    
    # Split train into train/val
    np.random.seed(config.SEED)
    indices = np.random.permutation(len(train_recs))
    val_size = int(len(train_recs) * 0.1)
    train_subset = [train_recs[i] for i in indices[val_size:]]
    val_subset = [train_recs[i] for i in indices[:val_size]]
    
    metrics, _ = train_fold(train_subset, val_subset, test_session, config)
    metrics["test_session"] = test_session
    all_fold_results.append(metrics)

# Aggregate
wa_scores = [r["WA"] for r in all_fold_results]
ua_scores = [r["UA"] for r in all_fold_results]

print(f"\n{'='*60}")
print("LOSO CROSS-VALIDATION RESULTS")
print(f"{'='*60}")
print(f"Average WA: {np.mean(wa_scores)*100:.2f}% ± {np.std(wa_scores)*100:.2f}%")
print(f"Average UA: {np.mean(ua_scores)*100:.2f}% ± {np.std(ua_scores)*100:.2f}%")
print(f"Per-fold WA: {[f'{wa*100:.2f}%' for wa in wa_scores]}")
"""
print("Full LOSO code is commented out. Uncomment to run all 5 folds.")

In [None]:
# =============================================================================
# Visualize Confusion Matrix
# =============================================================================
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, labels, title="Confusion Matrix"):
    fig, ax = plt.subplots(figsize=(8, 6))
    im = ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    ax.figure.colorbar(im, ax=ax)
    
    ax.set(xticks=np.arange(len(labels)),
           yticks=np.arange(len(labels)),
           xticklabels=labels, yticklabels=labels,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')
    
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    
    # Text annotations
    thresh = cm.max() / 2.
    for i in range(len(labels)):
        for j in range(len(labels)):
            ax.text(j, i, format(cm[i, j], 'd'),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    
    fig.tight_layout()
    return fig

# Plot test confusion matrix
if 'test_metrics' in dir() and 'confusion_matrix' in test_metrics:
    cm = np.array(test_metrics['confusion_matrix'])
    fig = plot_confusion_matrix(cm, LABEL_NAMES, f"Session {TEST_SESSION} Test Set")
    plt.savefig(f"{config.OUT_DIR}/confusion_matrix_fold{TEST_SESSION}.png", dpi=150, bbox_inches='tight')
    plt.show()
    
    # Per-class accuracy
    print("\nPer-class Recall:")
    for i, name in enumerate(LABEL_NAMES):
        recall = cm[i, i] / cm[i].sum() if cm[i].sum() > 0 else 0
        print(f"  {name}: {recall*100:.1f}%")

In [None]:
# =============================================================================
# Save Results
# =============================================================================

results = {
    "method": "RoBERTa + Attention Pooling",
    "backbone": config.BACKBONE,
    "pooling": config.POOLING,
    "dataset": "IEMOCAP 4-class",
    "protocol": "LOSO (single fold)" if 'TEST_SESSION' in dir() else "LOSO 5-fold",
    "test_session": TEST_SESSION if 'TEST_SESSION' in dir() else "all",
    "test_WA": test_metrics["WA"] if 'test_metrics' in dir() else None,
    "test_UA": test_metrics["UA"] if 'test_metrics' in dir() else None,
    "test_F1": test_metrics["F1_macro"] if 'test_metrics' in dir() else None,
    "config": {
        "epochs": config.EPOCHS,
        "batch_size": config.BATCH_SIZE,
        "lr": config.LR,
        "dropout": config.DROPOUT,
        "label_smoothing": config.LABEL_SMOOTHING,
        "max_length": config.MAX_LENGTH,
    },
    "timestamp": datetime.now().isoformat(),
}

# Save JSON
results_path = f"{config.OUT_DIR}/results.json"
with open(results_path, "w") as f:
    json.dump(results, f, indent=2)
print(f"Results saved to: {results_path}")

# Print benchmark comparison
print(f"\n{'='*60}")
print("BENCHMARK COMPARISON (Text-only on IEMOCAP 4-class)")
print(f"{'='*60}")
print(f"  RobinNet (2024):        71.1% WA, 70.6% UA")
print(f"  TSIN (2022):            68.7% WA")
print(f"  ISSA-BiGRU-MHA (2024):  66.1% WA, 66.5% UA")
print(f"  ─────────────────────────────────────────")
if 'test_metrics' in dir():
    print(f"  Our result:             {test_metrics['WA']*100:.1f}% WA, {test_metrics['UA']*100:.1f}% UA")

In [None]:
# =============================================================================
# Download Artifacts (for Kaggle)
# =============================================================================
import shutil

# Create zip of all artifacts
shutil.make_archive("/kaggle/working/text_branch_artifacts", "zip", config.OUT_DIR)
print(f"Artifacts zipped to: /kaggle/working/text_branch_artifacts.zip")

# List saved files
print("\nSaved files:")
for f in Path(config.OUT_DIR).glob("*"):
    size = f.stat().st_size / 1024
    print(f"  {f.name}: {size:.1f} KB")