In [None]:
# =========================================================
# STEP 5 â€” CNN + Temporal Transformer + Behavioral Fusion
# (WITH TRAINING LOGS)
# =========================================================

import torch
import torch.nn as nn
import numpy as np
import time
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix

# -----------------------------
# Load Step-3 engineered data
# -----------------------------
data = np.load("features_S2_w60.npz")

EDA  = data["EDA_windows"]
BVP  = data["BVP_windows"]
ACC  = data["ACC_windows"]
TEMP = data["TEMP_windows"]
BEH  = data["behavior_features"]
labels = data["labels"]

print("Loaded:")
print("  EDA :", EDA.shape)
print("  BEH :", BEH.shape)
print("  Labels:", labels.shape)

# -----------------------------
# Label remap
# -----------------------------
labels = labels - 1
assert labels.min() == 0 and labels.max() == 2
print("âœ“ Labels remapped to:", np.unique(labels))

# -----------------------------
# Normalization
# -----------------------------
def z_norm_windowwise(x):
    return (x - x.mean(axis=1, keepdims=True)) / (x.std(axis=1, keepdims=True) + 1e-6)

EDA  = z_norm_windowwise(EDA)
BVP  = z_norm_windowwise(BVP)
ACC  = z_norm_windowwise(ACC)
TEMP = z_norm_windowwise(TEMP)

BEH = (BEH - BEH.mean(axis=0)) / (BEH.std(axis=0) + 1e-6)

print(" Normalization complete")

# -----------------------------
# Dataset
# -----------------------------
class WESADDataset(Dataset):
    def __init__(self, EDA, BVP, ACC, TEMP, BEH, y):
        self.EDA  = torch.tensor(EDA,  dtype=torch.float32).unsqueeze(-1)
        self.BVP  = torch.tensor(BVP,  dtype=torch.float32).unsqueeze(-1)
        self.ACC  = torch.tensor(ACC,  dtype=torch.float32).unsqueeze(-1)
        self.TEMP = torch.tensor(TEMP, dtype=torch.float32).unsqueeze(-1)
        self.BEH  = torch.tensor(BEH,  dtype=torch.float32)
        self.y    = torch.tensor(y,    dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.EDA[idx], self.BVP[idx], self.ACC[idx], self.TEMP[idx], self.BEH[idx], self.y[idx]

# -----------------------------
# Train / Test split
# -----------------------------
idx = np.arange(len(labels))
train_idx, test_idx = train_test_split(
    idx, test_size=0.2, stratify=labels, random_state=42
)

train_ds = WESADDataset(
    EDA[train_idx], BVP[train_idx], ACC[train_idx],
    TEMP[train_idx], BEH[train_idx], labels[train_idx]
)

test_ds = WESADDataset(
    EDA[test_idx], BVP[test_idx], ACC[test_idx],
    TEMP[test_idx], BEH[test_idx], labels[test_idx]
)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=32, shuffle=False)

print(f"Train batches: {len(train_loader)} | Test batches: {len(test_loader)}")

# -----------------------------
# Model components
# -----------------------------
class CNNEncoder(nn.Module):
    def __init__(self, dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(1, 32, 5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, dim, 3, padding=1),
            nn.BatchNorm1d(dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x.permute(0,2,1)).permute(0,2,1)

class TemporalTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=1)  # ðŸ”¥ faster

    def forward(self, x):
        return self.enc(x)

class BehaviorEncoder(nn.Module):
    def __init__(self, in_dim, dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, dim)
        )

    def forward(self, x):
        return self.net(x)

class FusionTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=1)  # ðŸ”¥ faster

    def forward(self, x):
        return self.enc(x)

class EmotionModel(nn.Module):
    def __init__(self, beh_dim, dim=128):
        super().__init__()
        self.cnn = CNNEncoder(dim)
        self.temp_tf = TemporalTransformer(dim)
        self.beh_enc = BehaviorEncoder(beh_dim, dim)
        self.fuse_tf = FusionTransformer(dim)
        self.cls = nn.Sequential(
            nn.Linear(dim, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 3)
        )

    def encode(self, x):
        return self.temp_tf(self.cnn(x)).mean(dim=1)

    def forward(self, EDA, BVP, ACC, TEMP, BEH):
        feats = [
            self.encode(EDA),
            self.encode(BVP),
            self.encode(ACC),
            self.encode(TEMP),
            self.beh_enc(BEH)
        ]
        fused = self.fuse_tf(torch.stack(feats, dim=1)).mean(dim=1)
        return self.cls(fused)

# -----------------------------
# Training setup
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EmotionModel(BEH.shape[1]).to(device)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0,1,2]),
    y=labels
)
criterion = nn.CrossEntropyLoss(
    weight=torch.tensor(class_weights, dtype=torch.float32).to(device)
)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# Freeze encoders
for p in model.cnn.parameters(): p.requires_grad = False
for p in model.temp_tf.parameters(): p.requires_grad = False

# -----------------------------
# Training loop (LOGGED)
# -----------------------------
EPOCHS = 30
LOG_INTERVAL = 5

for epoch in range(EPOCHS):
    start_time = time.time()
    model.train()
    epoch_loss = 0

    if epoch == 10:
        for p in model.cnn.parameters():
            p.requires_grad = True
        for p in model.temp_tf.parameters():
            p.requires_grad = True

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    print("Encoders unfrozen")

    for i, (EDA,BVP,ACC,TEMP,BEH,y) in enumerate(train_loader):
        EDA,BVP,ACC,TEMP,BEH,y = (
            EDA.to(device), BVP.to(device),
            ACC.to(device), TEMP.to(device),
            BEH.to(device), y.to(device)
        )

        optimizer.zero_grad()
        loss = criterion(model(EDA,BVP,ACC,TEMP,BEH), y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        if (i+1) % LOG_INTERVAL == 0:
            print(f"  Batch {i+1}/{len(train_loader)} | Loss: {loss.item():.4f}")

    print(
        f"Epoch {epoch+1}/{EPOCHS} | "
        f"Loss: {epoch_loss:.4f} | "
        f"Time: {(time.time()-start_time):.1f}s"
    )

# -----------------------------
# Evaluation
# -----------------------------
model.eval()
preds, gts = [], []

with torch.no_grad():
    for EDA,BVP,ACC,TEMP,BEH,y in test_loader:
        out = model(
            EDA.to(device), BVP.to(device),
            ACC.to(device), TEMP.to(device),
            BEH.to(device)
        )
        preds.extend(torch.argmax(out,1).cpu().numpy())
        gts.extend(y.numpy())

print("Evaluation Results")
print(classification_report(gts, preds, zero_division=0))
print(confusion_matrix(gts, preds))


Loaded:
  EDA : (281, 60)
  BEH : (281, 5)
  Labels: (281,)
âœ“ Labels remapped to: [0 1 2]
âœ“ Normalization complete
Train batches: 7 | Test batches: 2
  Batch 5/7 | Loss: 1.0385
Epoch 1/30 | Loss: 7.2972 | Time: 2.4s
  Batch 5/7 | Loss: 0.9896
Epoch 2/30 | Loss: 6.6079 | Time: 2.4s
  Batch 5/7 | Loss: 0.7194
Epoch 3/30 | Loss: 5.7154 | Time: 2.5s
  Batch 5/7 | Loss: 0.6667
Epoch 4/30 | Loss: 5.2556 | Time: 2.5s
  Batch 5/7 | Loss: 0.6536
Epoch 5/30 | Loss: 4.4212 | Time: 2.4s
  Batch 5/7 | Loss: 0.4402
Epoch 6/30 | Loss: 3.8225 | Time: 2.6s
  Batch 5/7 | Loss: 0.4109
Epoch 7/30 | Loss: 3.2188 | Time: 3.4s
  Batch 5/7 | Loss: 0.4134
Epoch 8/30 | Loss: 2.8382 | Time: 3.3s
  Batch 5/7 | Loss: 0.4399
Epoch 9/30 | Loss: 2.5538 | Time: 4.1s
  Batch 5/7 | Loss: 0.4142
Epoch 10/30 | Loss: 2.2546 | Time: 3.8s
ðŸ”“ Encoders unfrozen
  Batch 5/7 | Loss: 0.4990
Epoch 11/30 | Loss: 3.3907 | Time: 5.2s
  Batch 5/7 | Loss: 0.3281
Epoch 12/30 | Loss: 2.3510 | Time: 5.1s
  Batch 5/7 | Loss: 0.4226
E

In [None]:
# ============================================================
# STEP-6 â€” Multi-Subject Training (Physio + Behavioral Fusion)
# ============================================================

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import time

# -----------------------------
# CONFIG
# -----------------------------
SUBJECT_FILES = [
    "features_S2_w60.npz",
    "features_S3_w60.npz",
    "features_S4_w60.npz",
    "features_S5_w60.npz",
    "features_S6_w60.npz",
    "features_S7_w60.npz",
    "features_S8_w60.npz",
    "features_S9_w60.npz",
    "features_S10_w60.npz",
    "features_S11_w60.npz",
    "features_S12_w60.npz",
    "features_S13_w60.npz",
    "features_S14_w60.npz",
    "features_S15_w60.npz",
    "features_S17_w60.npz",
]

BATCH_SIZE = 32
EPOCHS = 30
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------------
# LOAD & MERGE SUBJECT DATA
# -----------------------------
EDA_all, BVP_all, ACC_all, TEMP_all, BEH_all, Y_all = [], [], [], [], [], []

for path in SUBJECT_FILES:
    d = np.load(path)

    EDA_all.append(d["EDA_windows"])
    BVP_all.append(d["BVP_windows"])
    ACC_all.append(d["ACC_windows"])
    TEMP_all.append(d["TEMP_windows"])
    BEH_all.append(d["behavior_features"])

    # ðŸ”‘ label fix: {1,2,3} â†’ {0,1,2}
    y = d["labels"] - 1
    assert y.min() == 0 and y.max() == 2
    Y_all.append(y)

EDA_all  = np.concatenate(EDA_all)
BVP_all  = np.concatenate(BVP_all)
ACC_all  = np.concatenate(ACC_all)
TEMP_all = np.concatenate(TEMP_all)
BEH_all  = np.concatenate(BEH_all)
Y_all    = np.concatenate(Y_all)

print("âœ“ Multi-subject data loaded")
print("Total samples:", len(Y_all))
print("Label distribution:", np.unique(Y_all, return_counts=True))

# -----------------------------
# NORMALIZATION
# -----------------------------
def z_norm_windowwise(x):
    mean = x.mean(axis=1, keepdims=True)
    std  = x.std(axis=1, keepdims=True) + 1e-6
    return (x - mean) / std

EDA_all  = z_norm_windowwise(EDA_all)
BVP_all  = z_norm_windowwise(BVP_all)
ACC_all  = z_norm_windowwise(ACC_all)
TEMP_all = z_norm_windowwise(TEMP_all)

BEH_all = (BEH_all - BEH_all.mean(axis=0)) / (BEH_all.std(axis=0) + 1e-6)

print("âœ“ Normalization complete")
print("Behavior feature dim:", BEH_all.shape[1])

# -----------------------------
# DATASET
# -----------------------------
class WESADDataset(Dataset):
    def __init__(self, EDA, BVP, ACC, TEMP, BEH, labels):
        self.EDA  = torch.tensor(EDA,  dtype=torch.float32).unsqueeze(-1)
        self.BVP  = torch.tensor(BVP,  dtype=torch.float32).unsqueeze(-1)
        self.ACC  = torch.tensor(ACC,  dtype=torch.float32).unsqueeze(-1)
        self.TEMP = torch.tensor(TEMP, dtype=torch.float32).unsqueeze(-1)
        self.BEH  = torch.tensor(BEH,  dtype=torch.float32)
        self.y    = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.EDA[idx], self.BVP[idx], self.ACC[idx], self.TEMP[idx], self.BEH[idx], self.y[idx]

# -----------------------------
# TRAIN / TEST SPLIT
# -----------------------------
idx = np.arange(len(Y_all))
train_idx, test_idx = train_test_split(
    idx, test_size=0.2, stratify=Y_all, random_state=42
)

train_loader = DataLoader(
    WESADDataset(
        EDA_all[train_idx], BVP_all[train_idx], ACC_all[train_idx],
        TEMP_all[train_idx], BEH_all[train_idx], Y_all[train_idx]
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_loader = DataLoader(
    WESADDataset(
        EDA_all[test_idx], BVP_all[test_idx], ACC_all[test_idx],
        TEMP_all[test_idx], BEH_all[test_idx], Y_all[test_idx]
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

print(f"Train batches: {len(train_loader)} | Test batches: {len(test_loader)}")

# -----------------------------
# MODEL DEFINITIONS
# -----------------------------
class CNNEncoder(nn.Module):
    def __init__(self, in_ch=1, dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, 32, 5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, dim, 3, padding=1),
            nn.BatchNorm1d(dim),
            nn.ReLU()
        )

    def forward(self, x):
        x = x.permute(0, 2, 1)
        return self.net(x).permute(0, 2, 1)

class TemporalTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(d_model=dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=2)

    def forward(self, x):
        return self.enc(x)

class BehaviorEncoder(nn.Module):
    def __init__(self, in_dim, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, out_dim)
        )

    def forward(self, x):
        return self.net(x)

class FusionTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(d_model=dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=2)

    def forward(self, x):
        return self.enc(x)

class EmotionModel(nn.Module):
    def __init__(self, beh_dim, dim=128, classes=3):
        super().__init__()

        self.cnn = CNNEncoder(1, dim)
        self.temp_tf = TemporalTransformer(dim)
        self.beh_enc = BehaviorEncoder(beh_dim, dim)
        self.fuse_tf = FusionTransformer(dim)

        self.cls = nn.Sequential(
            nn.Linear(dim, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, classes)
        )

    def encode_signal(self, x):
        return self.temp_tf(self.cnn(x)).mean(dim=1)

    def forward(self, EDA, BVP, ACC, TEMP, BEH):
        e1 = self.encode_signal(EDA)
        e2 = self.encode_signal(BVP)
        e3 = self.encode_signal(ACC)
        e4 = self.encode_signal(TEMP)
        eb = self.beh_enc(BEH)

        fused = torch.stack([e1, e2, e3, e4, eb], dim=1)
        fused = self.fuse_tf(fused).mean(dim=1)
        return self.cls(fused)

# -----------------------------
# TRAINING SETUP 
# -----------------------------
beh_dim = BEH_all.shape[1]
model = EmotionModel(beh_dim=beh_dim).to(device)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0, 1, 2]),
    y=Y_all
)

criterion = nn.CrossEntropyLoss(
    weight=torch.tensor(class_weights, dtype=torch.float32).to(device)
)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

# Freeze encoders initially
for p in model.cnn.parameters():     p.requires_grad = False
for p in model.temp_tf.parameters(): p.requires_grad = False

# -----------------------------
# TRAINING LOOP
# -----------------------------
for epoch in range(EPOCHS):
    model.train()
    start = time.time()
    total_loss = 0

    if epoch == 10:
        for p in model.cnn.parameters():     p.requires_grad = True
        for p in model.temp_tf.parameters(): p.requires_grad = True
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
        print("ðŸ”“ Encoders unfrozen")

    for i, batch in enumerate(train_loader, 1):
        EDA, BVP, ACC, TEMP, BEH, y = [b.to(device) for b in batch]

        optimizer.zero_grad()
        loss = criterion(model(EDA, BVP, ACC, TEMP, BEH), y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if i % 5 == 0:
            print(f"  Batch {i}/{len(train_loader)} | Loss: {loss.item():.4f}")

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss:.4f} | Time: {time.time()-start:.1f}s")

# -----------------------------
# EVALUATION
# -----------------------------
model.eval()
preds, gts = [], []

with torch.no_grad():
    for batch in test_loader:
        EDA, BVP, ACC, TEMP, BEH, y = batch
        out = model(
            EDA.to(device), BVP.to(device),
            ACC.to(device), TEMP.to(device),
            BEH.to(device)
        )
        preds.extend(out.argmax(1).cpu().numpy())
        gts.extend(y.numpy())

print("\nðŸ“Š Multi-Subject Evaluation")
print(classification_report(gts, preds, zero_division=0))
print(confusion_matrix(gts, preds))


âœ“ Multi-subject data loaded
Total samples: 1440
Label distribution: (array([0, 1, 2]), array([774, 423, 243]))
âœ“ Normalization complete
Behavior feature dim: 4
Train batches: 36 | Test batches: 9
  Batch 5/36 | Loss: 1.1017
  Batch 10/36 | Loss: 1.2869
  Batch 15/36 | Loss: 1.0733
  Batch 20/36 | Loss: 1.0201
  Batch 25/36 | Loss: 0.9568
  Batch 30/36 | Loss: 0.9126
  Batch 35/36 | Loss: 0.8698
Epoch 1/30 | Loss: 36.4103 | Time: 35.6s
  Batch 5/36 | Loss: 0.8306
  Batch 10/36 | Loss: 0.7050
  Batch 15/36 | Loss: 0.6046
  Batch 20/36 | Loss: 0.8174
  Batch 25/36 | Loss: 0.8776
  Batch 30/36 | Loss: 0.7501
  Batch 35/36 | Loss: 0.8499
Epoch 2/30 | Loss: 27.9356 | Time: 32.0s
  Batch 5/36 | Loss: 0.9751
  Batch 10/36 | Loss: 0.6803
  Batch 15/36 | Loss: 0.6046
  Batch 20/36 | Loss: 0.6073
  Batch 25/36 | Loss: 0.5842
  Batch 30/36 | Loss: 0.7182
  Batch 35/36 | Loss: 0.7018
Epoch 3/30 | Loss: 26.1420 | Time: 29.0s
  Batch 5/36 | Loss: 0.7398
  Batch 10/36 | Loss: 0.5630
  Batch 15/36 

In [None]:
# ============================================================
# STEP-7 â€” LOSO VALIDATION (Physio + Behavioral Fusion)
# ============================================================

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import time

# -----------------------------
# CONFIG
# -----------------------------
SUBJECT_FILES = [
    "features_S2_w60.npz",
    "features_S3_w60.npz",
    "features_S4_w60.npz",
    "features_S5_w60.npz",
    "features_S6_w60.npz",
    "features_S7_w60.npz",
    "features_S8_w60.npz",
    "features_S9_w60.npz",
    "features_S10_w60.npz",
    "features_S11_w60.npz",
    "features_S12_w60.npz",
    "features_S13_w60.npz",
    "features_S14_w60.npz",
    "features_S15_w60.npz",
    "features_S17_w60.npz",
]

BATCH_SIZE = 32
EPOCHS = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------------
# DATASET
# -----------------------------
class WESADDataset(torch.utils.data.Dataset):
    def __init__(self, EDA, BVP, ACC, TEMP, BEH, labels):
        self.EDA  = torch.tensor(EDA, dtype=torch.float32).unsqueeze(-1)
        self.BVP  = torch.tensor(BVP, dtype=torch.float32).unsqueeze(-1)
        self.ACC  = torch.tensor(ACC, dtype=torch.float32).unsqueeze(-1)
        self.TEMP = torch.tensor(TEMP, dtype=torch.float32).unsqueeze(-1)
        self.BEH  = torch.tensor(BEH, dtype=torch.float32)
        self.y    = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.EDA[idx], self.BVP[idx], self.ACC[idx], self.TEMP[idx], self.BEH[idx], self.y[idx]

# -----------------------------
# MODEL DEFINITIONS
# -----------------------------
class CNNEncoder(nn.Module):
    def __init__(self, in_ch=1, dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, 32, 5, padding=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, dim, 3, padding=1),
            nn.BatchNorm1d(dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x.permute(0,2,1)).permute(0,2,1)

class TemporalTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(d_model=dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=2)

    def forward(self, x):
        return self.enc(x)

class BehaviorEncoder(nn.Module):
    def __init__(self, in_dim, out_dim=128):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, out_dim)
        )

    def forward(self, x):
        return self.net(x)

class FusionTransformer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        layer = nn.TransformerEncoderLayer(d_model=dim, nhead=4, batch_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=2)

    def forward(self, x):
        return self.enc(x)

class EmotionModel(nn.Module):
    def __init__(self, beh_dim, dim=128, classes=3):
        super().__init__()
        self.cnn = CNNEncoder(1, dim)
        self.temp_tf = TemporalTransformer(dim)
        self.beh_enc = BehaviorEncoder(beh_dim, dim)
        self.fuse_tf = FusionTransformer(dim)

        self.cls = nn.Sequential(
            nn.Linear(dim, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, classes)
        )

    def encode_signal(self, x):
        return self.temp_tf(self.cnn(x)).mean(dim=1)

    def forward(self, EDA, BVP, ACC, TEMP, BEH):
        e1 = self.encode_signal(EDA)
        e2 = self.encode_signal(BVP)
        e3 = self.encode_signal(ACC)
        e4 = self.encode_signal(TEMP)
        eb = self.beh_enc(BEH)

        fused = torch.stack([e1, e2, e3, e4, eb], dim=1)
        fused = self.fuse_tf(fused).mean(dim=1)
        return self.cls(fused)

# -----------------------------
# LOSO LOOP
# -----------------------------
all_reports = []

for test_subject in SUBJECT_FILES:
    print(f"\nðŸ§ª LOSO Test Subject: {test_subject}")

    # Load test subject
    test_data = np.load(test_subject)

    EDA_test  = test_data["EDA_windows"]
    BVP_test  = test_data["BVP_windows"]
    ACC_test  = test_data["ACC_windows"]
    TEMP_test = test_data["TEMP_windows"]
    BEH_test  = test_data["behavior_features"]
    y_test    = test_data["labels"] - 1

    # Load training subjects
    EDA_tr, BVP_tr, ACC_tr, TEMP_tr, BEH_tr, y_tr = [], [], [], [], [], []

    for train_subject in SUBJECT_FILES:
        if train_subject == test_subject:
            continue
        d = np.load(train_subject)
        EDA_tr.append(d["EDA_windows"])
        BVP_tr.append(d["BVP_windows"])
        ACC_tr.append(d["ACC_windows"])
        TEMP_tr.append(d["TEMP_windows"])
        BEH_tr.append(d["behavior_features"])
        y_tr.append(d["labels"] - 1)

    EDA_tr  = np.concatenate(EDA_tr)
    BVP_tr  = np.concatenate(BVP_tr)
    ACC_tr  = np.concatenate(ACC_tr)
    TEMP_tr = np.concatenate(TEMP_tr)
    BEH_tr  = np.concatenate(BEH_tr)
    y_tr    = np.concatenate(y_tr)

    # Dataloaders
    train_loader = DataLoader(
        WESADDataset(EDA_tr, BVP_tr, ACC_tr, TEMP_tr, BEH_tr, y_tr),
        batch_size=BATCH_SIZE,
        shuffle=True
    )

    test_loader = DataLoader(
        WESADDataset(EDA_test, BVP_test, ACC_test, TEMP_test, BEH_test, y_test),
        batch_size=BATCH_SIZE,
        shuffle=False
    )

    # Model
    model = EmotionModel(beh_dim=BEH_tr.shape[1]).to(device)

    class_weights = compute_class_weight(
        class_weight="balanced",
        classes=np.array([0,1,2]),
        y=y_tr
    )

    criterion = nn.CrossEntropyLoss(
        weight=torch.tensor(class_weights, dtype=torch.float32).to(device)
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

    # Train
    for epoch in range(EPOCHS):
        model.train()
        for batch in train_loader:
            EDA, BVP, ACC, TEMP, BEH, y = [b.to(device) for b in batch]
            optimizer.zero_grad()
            loss = criterion(model(EDA, BVP, ACC, TEMP, BEH), y)
            loss.backward()
            optimizer.step()

    # Evaluate
    model.eval()
    preds, gts = [], []

    with torch.no_grad():
        for batch in test_loader:
            EDA, BVP, ACC, TEMP, BEH, y = batch
            out = model(
                EDA.to(device), BVP.to(device),
                ACC.to(device), TEMP.to(device),
                BEH.to(device)
            )
            preds.extend(out.argmax(1).cpu().numpy())
            gts.extend(y.numpy())

    print(classification_report(gts, preds, zero_division=0))
    all_reports.append(classification_report(gts, preds, output_dict=True))

print("\nâœ… LOSO Validation Completed")



ðŸ§ª LOSO Test Subject: features_S2_w60.npz
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       152
           1       0.29      1.00      0.45        82
           2       0.00      0.00      0.00        47

    accuracy                           0.29       281
   macro avg       0.10      0.33      0.15       281
weighted avg       0.09      0.29      0.13       281


ðŸ§ª LOSO Test Subject: features_S3_w60.npz


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0       0.54      0.95      0.69       152
           1       0.00      0.00      0.00        85
           2       0.00      0.00      0.00        49

    accuracy                           0.51       286
   macro avg       0.18      0.32      0.23       286
weighted avg       0.29      0.51      0.37       286


ðŸ§ª LOSO Test Subject: features_S4_w60.npz
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       154
           1       0.29      1.00      0.45        84
           2       0.00      0.00      0.00        49

    accuracy                           0.29       287
   macro avg       0.10      0.33      0.15       287
weighted avg       0.09      0.29      0.13       287


ðŸ§ª LOSO Test Subject: features_S5_w60.npz


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       159
           1       0.29      1.00      0.45        85
           2       0.00      0.00      0.00        50

    accuracy                           0.29       294
   macro avg       0.10      0.33      0.15       294
weighted avg       0.08      0.29      0.13       294


ðŸ§ª LOSO Test Subject: features_S6_w60.npz


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0       0.59      0.70      0.64       157
           1       0.11      0.14      0.12        87
           2       0.00      0.00      0.00        48

    accuracy                           0.42       292
   macro avg       0.24      0.28      0.26       292
weighted avg       0.35      0.42      0.38       292


âœ… LOSO Validation Completed
