In [None]:
import sys, subprocess, pathlib, re

def sh(cmd):
    print(">>", cmd)
    subprocess.check_call(cmd, shell=True)

sh(f"{sys.executable} -m pip -q install --upgrade pip setuptools wheel")
sh(f"{sys.executable} -m pip -q install synapseclient scikit-learn matplotlib tqdm")
sh(f"{sys.executable} -m pip -q install mne mne-connectivity xmltodict numpy scipy pandas joblib")
sh(f"{sys.executable} -m pip -q uninstall -y torcheeg || true")
sh("rm -rf torcheeg_src")
sh("git clone --depth 1 --branch v1.1.3 https://github.com/torcheeg/torcheeg.git torcheeg_src")

setup_py = pathlib.Path("torcheeg_src/setup.py")
txt = setup_py.read_text()

# Remove scipy<=1.10.1 constraint
txt2 = re.sub(r"scipy>=1\.7\.3\s*,\s*<=\s*1\.10\.1", "scipy>=1.7.3", txt)
setup_py.write_text(txt2)

# now install with deps
sh(f"{sys.executable} -m pip -q install ./torcheeg_src")

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from pathlib import Path

SEED = 0
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("device:", device)

ROOT = Path("./FACED_torcheeg")
DL   = ROOT/"downloads"
DATA = ROOT/"data"
DL.mkdir(parents=True, exist_ok=True)
DATA.mkdir(parents=True, exist_ok=True)

EMO = ["anger","disgust","fear","sadness","neutral","amusement","inspiration","joy","tenderness"]

device: cuda


In [None]:
import zipfile, getpass, synapseclient, os

SYN_EEG_FEATURES = "syn52368847"

token = os.environ.get("SYNAPSE_AUTH_TOKEN")
if not token:
    token = getpass.getpass("Synapse Personal Access Token: ").strip()

syn = synapseclient.Synapse()
syn.login(authToken=token, silent=True)

ent = syn.get(SYN_EEG_FEATURES, downloadLocation=str(DL))
feat_zip = Path(ent.path)
print("Downloaded:", feat_zip)

feat_dir = DATA/"EEG_Features_unzipped"
feat_dir.mkdir(parents=True, exist_ok=True)
marker = feat_dir/".unzipped_ok"

if not marker.exists():
    with zipfile.ZipFile(feat_zip, "r") as z:
        z.extractall(feat_dir)
    marker.write_text("ok")

cands = [
    feat_dir/"EEG_Features"/"DE",
    feat_dir/"DE",
]
DE_path = next((p for p in cands if p.exists()), None)
print("DE_path:", DE_path)
assert DE_path is not None, "Couldn't find EEG_Features/DE after unzip."

In [None]:
from torcheeg.datasets import FACEDFeatureDataset
from torcheeg import transforms
from torcheeg.datasets.constants import FACED_CHANNEL_LOCATION_DICT

IO_PATH = str((DATA/"io_faced_de").resolve())

dataset = FACEDFeatureDataset(
    root_path=str(DE_path),
    offline_transform=transforms.ToGrid(FACED_CHANNEL_LOCATION_DICT),
    online_transform=transforms.ToTensor(),
    label_transform=transforms.Select("emotion"),
    io_mode="pickle",
    io_path=IO_PATH,
    verbose=True
)

print("len(dataset) =", len(dataset))

item = dataset[0]
print("len(dataset[0]) =", len(item))
print("type(dataset[0][0]) =", type(item[0]), "shape =", tuple(item[0].shape))
print("label =", int(item[1]))

In [None]:
from sklearn.model_selection import train_test_split

N = len(dataset)

try:
    labels = np.fromiter((int(dataset.read_info(i)["emotion"]) for i in range(N)),
                         dtype=np.int64, count=N)
except Exception as e:
    print("read_info failed, falling back to dataset[i][1] labels:", e)
    labels = np.fromiter((int(dataset[i][1]) for i in range(N)),
                         dtype=np.int64, count=N)

idx = np.arange(N)

idx_train, idx_tmp, y_train, y_tmp = train_test_split(
    idx, labels, test_size=0.2, random_state=SEED, stratify=labels
)
idx_val, idx_test, y_val, y_test = train_test_split(
    idx_tmp, y_tmp, test_size=0.5, random_state=SEED, stratify=y_tmp
)

print("splits:", len(idx_train), len(idx_val), len(idx_test))
print("train class counts:", np.bincount(y_train, minlength=9))

class Wrap(Dataset):
    def __init__(self, base, indices):
        self.base = base
        self.indices = np.asarray(indices)
    def __len__(self):
        return len(self.indices)
    def __getitem__(self, i):
        x, y = self.base[int(self.indices[i])]
        x = x.float()
        # per-sample normalization
        mu = x.mean(dim=(1,2), keepdim=True)
        sd = x.std(dim=(1,2), keepdim=True).clamp_min(1e-6)
        x = (x - mu) / sd
        return x, int(y)

train_loader = DataLoader(Wrap(dataset, idx_train), batch_size=256, shuffle=True,  num_workers=0)
val_loader   = DataLoader(Wrap(dataset, idx_val),   batch_size=512, shuffle=False, num_workers=0)
test_loader  = DataLoader(Wrap(dataset, idx_test),  batch_size=512, shuffle=False, num_workers=0)

xb, yb = next(iter(train_loader))
print("batch:", xb.shape, yb.shape)

splits: 82656 10332 10332
train class counts: [ 8856  8856  8856  8856 11808  8856  8856  8856  8856]
batch: torch.Size([256, 5, 8, 9]) torch.Size([256])


In [None]:
import torch.nn as nn
import torch

in_ch = xb.shape[1]
H, W  = xb.shape[-2], xb.shape[-1]

class SmallCNN(nn.Module):
    def __init__(self, in_channels, num_classes=9):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(),

            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        return self.net(x)

model = SmallCNN(in_channels=in_ch, num_classes=9).to(device)

counts = np.bincount(y_train, minlength=9).astype(np.float32)
weights = (counts.sum() / (counts + 1e-6))
weights = weights / weights.mean()
class_w = torch.tensor(weights, dtype=torch.float32, device=device)

criterion = nn.CrossEntropyLoss(weight=class_w)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)

print("model ok | in_ch:", in_ch, "| grid:", (H, W))

model ok | in_ch: 5 | grid: (8, 9)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn

def run_epoch(loader, train: bool):
    model.train(train)
    total_loss, total_correct, total_n = 0.0, 0, 0

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)

        logits = model(xb)
        loss = criterion(logits, yb)

        if train:
            optimizer.zero_grad(set_to_none=True)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

        total_loss += float(loss.item()) * len(yb)
        total_correct += int((logits.argmax(1) == yb).sum().item())
        total_n += len(yb)

    return total_loss / total_n, total_correct / total_n


MAX_EPOCHS = 200
PATIENCE   = 12       # stop after this many epochs w/o val improvement
MIN_DELTA  = 1e-3
MIN_EPOCHS = 10

trL, vaL, trA, vaA = [], [], [], []
best_val_loss, best_state, best_epoch = float("inf"), None, -1
bad_epochs = 0

for e in range(1, MAX_EPOCHS + 1):
    tl, ta = run_epoch(train_loader, True)
    vl, va = run_epoch(val_loader, False)

    trL.append(tl); trA.append(ta)
    vaL.append(vl); vaA.append(va)

    improved = (vl < best_val_loss - MIN_DELTA)
    if improved:
        best_val_loss = vl
        best_epoch = e
        bad_epochs = 0
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    else:
        bad_epochs += 1

    print(f"epoch {e:03d} | train loss {tl:.4f} acc {ta:.3f} | val loss {vl:.4f} acc {va:.3f} "
          f"| best val loss {best_val_loss:.4f} @ {best_epoch:03d} | patience {bad_epochs}/{PATIENCE}")

    if e >= MIN_EPOCHS and bad_epochs >= PATIENCE:
        print(f"Early stopping: no val loss improvement for {PATIENCE} epochs. Stopping at epoch {e}.")
        break

# restore best (lowest val loss)
model.load_state_dict(best_state)

plt.figure()
plt.plot(trL, label="train loss")
plt.plot(vaL, label="val loss")
plt.axvline(best_epoch-1, linestyle="--", label="best epoch (val loss)")
plt.legend()
plt.title("Loss curves")
plt.show()

plt.figure()
plt.plot(trA, label="train acc")
plt.plot(vaA, label="val acc")
plt.axvline(best_epoch-1, linestyle="--", label="best epoch (val loss)")
plt.legend()
plt.title("Accuracy curves")
plt.show()

print("best val loss:", best_val_loss, "at epoch", best_epoch)
print("val acc at best loss epoch:", vaA[best_epoch-1])

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model.eval()
preds, trues = [], []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        logits = model(xb)
        preds.append(logits.argmax(1).cpu().numpy())
        trues.append(np.asarray(yb))

y_pred = np.concatenate(preds)
y_true = np.concatenate(trues)

print("Test acc:", accuracy_score(y_true, y_pred))
print(classification_report(y_true, y_pred, target_names=EMO, digits=3, zero_division=0))

cm = confusion_matrix(y_true, y_pred, labels=np.arange(9))
plt.figure(figsize=(7,6))
plt.imshow(cm)
plt.title("Confusion matrix (test)")
plt.xlabel("pred"); plt.ylabel("true")
plt.xticks(range(9), EMO, rotation=45, ha="right")
plt.yticks(range(9), EMO)
plt.colorbar()
plt.tight_layout()
plt.show()

In [None]:
import json, time, hashlib
from pathlib import Path
import torch

if "best_state" in globals() and best_state is not None:
    model.load_state_dict(best_state)

meta = {
    "saved_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
    "task": "EEG emotion classification (FACED DE -> 2D scalp grid -> CNN)",
    "classes": EMO if "EMO" in globals() else None,
    "num_classes": len(EMO) if "EMO" in globals() else None,
    "input_tensor_shape": [1, 5, 8, 9],   # (B, BANDS, H, W) for inference
    "bands": 5,
    "grid_hw": [8, 9],
    "notes": "Checkpoint contains model.state_dict only; recreate architecture in inference code before loading."
}

out_dir = Path("export_ckpt")
out_dir.mkdir(exist_ok=True)

ckpt_path = out_dir / "cnn_faced_best.pt"
meta_path = out_dir / "cnn_faced_best.meta.json"

torch.save(
    {
        "state_dict": model.state_dict(),
        "meta": meta,
    },
    ckpt_path
)

meta_path.write_text(json.dumps(meta, indent=2))

sha256 = hashlib.sha256(ckpt_path.read_bytes()).hexdigest()
(out_dir / "sha256.txt").write_text(sha256 + "\n")

print("Saved checkpoint:", ckpt_path)
print("Saved metadata:", meta_path)
print("SHA256:", sha256)

import zipfile
zip_path = Path("cnn_faced_export_loss_stop.zip")
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
    z.write(ckpt_path, arcname=ckpt_path.name)
    z.write(meta_path, arcname=meta_path.name)
    z.write(out_dir / "sha256.txt", arcname="sha256.txt")

print("Zipped:", zip_path)

from google.colab import files
files.download(str(zip_path))