In [8]:
import torch
import mne
import numpy as np
import glob
import pandas as pd

# Load metadata DataFrame
df_info = pd.read_pickle("data/FG_overview_df_v2.pkl")  # Update with actual path

# Define event IDs
event_labels = {'T1P': 301, 'T1Pn': 302, 'T3P': 303, 'T3Pn': 304,
                'T12P': 305, 'T12Pn': 306, 'T13P': 307, 'T13Pn': 308,
                'T23P': 309, 'T23Pn': 310}

file_paths = glob.glob("data/*_FG_preprocessed-epo.fif")  # Update with actual data path
print(f"Found {len(file_paths)} EEG files.")

all_eeg_data, all_labels = [], []

for file_path in file_paths:
    # Extract filename (e.g., "301A")
    file_name = file_path.split("/")[-1].split("_")[0]  # Extract "301A"

    # Extract Experiment ID (e.g., "301")
    exp_id = file_name[:4]

    # Get participants for this experiment
    experiment_participants = df_info[df_info["Exp_id"] == exp_id]

    if experiment_participants.empty:
        print(f"Skipping {file_name}: No participants found for {exp_id}.")
        continue

    # Load EEG file
    epochs = mne.read_epochs(file_path, preload=True)
    eeg_data = epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
    labels = epochs.events[:, -1]  # Extract event labels

    # Process each participant in the experiment
    for _, row in experiment_participants.iterrows():
        subject_id = row["Subject_id"]
        eeg_device = row["EEG_device"]  # 1, 2, or 3

        # Map EEG device to the corresponding event labels
        device_event_labels = {
            1: {301, 302, 305, 306, 307, 308},  # T1 labels
            2: {303, 304, 305, 306, 309, 310},  # T2 labels
            3: {307, 308, 309, 310, 303, 304},  # T3 labels
        }

        valid_events = device_event_labels[eeg_device]

        # Filter trials for this subject
        valid_trials = [i for i, label in enumerate(labels) if label in valid_events]

        if len(valid_trials) == 0:
            print(f"Skipping subject {subject_id} in {exp_id}: No relevant trials for EEG device {eeg_device}.")
            continue

        # Keep only the relevant trials
        eeg_subject_data = eeg_data[valid_trials]
        labels_subject = labels[valid_trials]

        # Normalize per file
        eeg_subject_data = (eeg_subject_data - eeg_subject_data.mean()) / eeg_subject_data.std()

        # Convert labels to binary classification (feedback vs. no feedback)
        binary_labels = np.array([1 if label in {301, 303, 305, 307, 309} else 0 for label in labels_subject])

        all_eeg_data.append(eeg_subject_data)
        all_labels.append(binary_labels)

# Convert to PyTorch tensors
eeg_tensor = torch.tensor(np.concatenate(all_eeg_data, axis=0), dtype=torch.float32)
labels_tensor = torch.tensor(np.concatenate(all_labels, axis=0), dtype=torch.long)

print(f"Final EEG Tensor Shape: {eeg_tensor.shape}")  


Found 15 EEG files.
Final EEG Tensor Shape: torch.Size([2674, 64, 3000])


In [38]:
for i in range(len(all_labels)):
    print(all_labels[i].shape)


(180,)
(175,)
(177,)
(180,)
(180,)
(179,)
(180,)
(180,)
(176,)
(180,)
(173,)
(176,)
(180,)
(178,)
(180,)


In [9]:
df_info

Unnamed: 0,Subject_id,Exp_id,Friend_status,EEG_device,Force_device,Force_port,Age,Gender,Class_friends,Class_close_friends,Friends,Close_friends,Triad_id,Participant,tFriends,tClose_friends,All_friends,tClass,rClass
0,1049,301A,Yes,1,1,1,22.239562,F,3,6,9,14,301,P1,12,20,32,9,0.666667
1,1029,301B,No,2,3,2,24.331280,M,3,1,8,5,301,P2,11,6,17,4,0.250000
2,1028,301C,Yes,3,4,3,21.670089,M,8,4,9,4,301,P3,17,8,25,12,0.333333
3,1064,302A,Yes,1,1,1,22.009582,M,14,8,11,5,302,P1,25,13,38,22,0.363636
4,1024,302B,Yes,2,3,2,21.355236,M,5,7,6,10,302,P2,11,17,28,12,0.583333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,6069,330B,No,2,3,2,19.373032,F,14,3,17,17,330,P2,31,20,51,17,0.176471
88,6048,330C,Yes,3,4,3,19.841205,M,13,11,25,26,330,P3,38,37,75,24,0.458333
89,6015,331A,Yes,1,1,1,20.052019,M,9,4,14,13,331,P1,23,17,40,13,0.307692
90,6183,331B,Yes,2,3,2,18.863792,M,22,6,25,12,331,P2,47,18,65,28,0.214286


In [10]:
4443 / 300


14.81

In [11]:
print(f"EEG Data Shape: {eeg_tensor.shape}")  
print(f"Number of Unique Labels: {len(set(labels_tensor.numpy()))}")  
print(f"Total Files Processed: {len(file_path)}")

EEG Data Shape: torch.Size([2674, 64, 3000])
Number of Unique Labels: 2
Total Files Processed: 33


In [12]:
epochs

Unnamed: 0,General,General.1
,Filename(s),301A_FG_preprocessed-epo.fif
,MNE object type,EpochsFIF
,Measurement date,2023-10-11 at 14:54:47 UTC
,Participant,
,Experimenter,Unknown
,Acquisition,Acquisition
,Total number of events,300
,Events counts,T12P: 30  T12Pn: 30  T13P: 30  T13Pn: 30  T1P: 30  T1Pn: 30  T23P: 30  T23Pn: 30  T3P: 30  T3Pn: 30
,Time range,-0.500 – 5.498 s
,Baseline,off


In [13]:
import torch
import mne
import numpy as np
from torcheeg.models import LaBraM

# Load EEG data
file_path = "data/301A_FG_preprocessed-epo.fif"  # Update with your actual file
epochs = mne.read_epochs(file_path, preload=True)

# Extract EEG data and labels
eeg_data = epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
labels = epochs.events[:, -1]  # Assuming the last column contains event labels

# Normalize EEG data
eeg_data = (eeg_data - eeg_data.mean()) / eeg_data.std()

# Convert to PyTorch tensors
eeg_tensor = torch.tensor(eeg_data, dtype=torch.float32)

no_feedback_labels = {301, 303, 305, 307, 309}  # Feedback events
binary_labels = np.array([0 if label in no_feedback_labels else 1 for label in labels])

labels_tensor = torch.tensor(binary_labels, dtype=torch.long)

# Extract actual electrode names
electrode_names = [ch.upper() for ch in epochs.ch_names]  # Convert to uppercase

PATCH_SIZE = 500  

# Compute number of patches
num_time_steps = eeg_tensor.shape[2]
num_patches = num_time_steps // PATCH_SIZE  # Truncate remainder

# Reshape data into patches
eeg_tensor = eeg_tensor[:, :, :num_patches * PATCH_SIZE]  # Trim to fit patches
eeg_tensor = eeg_tensor.view(eeg_tensor.shape[0], eeg_tensor.shape[1], num_patches, PATCH_SIZE)

print(f"New EEG Tensor Shape: {eeg_tensor.shape}")  # Should be (batch_size, num_channels, num_patches, patch_size)

New EEG Tensor Shape: torch.Size([300, 64, 6, 500])


In [14]:
eeg_tensor.shape

torch.Size([300, 64, 6, 500])

In [15]:
epochs

Unnamed: 0,General,General.1
,Filename(s),301A_FG_preprocessed-epo.fif
,MNE object type,EpochsFIF
,Measurement date,2023-10-11 at 14:54:47 UTC
,Participant,
,Experimenter,Unknown
,Acquisition,Acquisition
,Total number of events,300
,Events counts,T12P: 30  T12Pn: 30  T13P: 30  T13Pn: 30  T1P: 30  T1Pn: 30  T23P: 30  T23Pn: 30  T3P: 30  T3Pn: 30
,Time range,-0.500 – 5.498 s
,Baseline,off


In [16]:
print(epochs.event_id)


{'T1P': 301, 'T1Pn': 302, 'T3P': 303, 'T3Pn': 304, 'T12P': 305, 'T12Pn': 306, 'T13P': 307, 'T13Pn': 308, 'T23P': 309, 'T23Pn': 310}


In [17]:
model = LaBraM(num_electrodes=len(electrode_names), electrodes=electrode_names)

# Load pre-trained weights (if available)
# model.load_state_dict(torch.load("path_to_pretrained_labram.pth"))

model.train()


LaBraM(
  (patch_embed): TemporalConv(
    (conv1): Conv2d(1, 8, kernel_size=(1, 15), stride=(1, 8), padding=(0, 7))
    (gelu1): GELU(approximate='none')
    (norm1): GroupNorm(4, 8, eps=1e-05, affine=True)
    (conv2): Conv2d(8, 8, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
    (gelu2): GELU(approximate='none')
    (norm2): GroupNorm(4, 8, eps=1e-05, affine=True)
    (conv3): Conv2d(8, 8, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
    (norm3): GroupNorm(4, 8, eps=1e-05, affine=True)
    (gelu3): GELU(approximate='none')
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0-11): 12 x Block(
      (norm1): LayerNorm((200,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=200, out_features=600, bias=False)
        (q_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
        (k_norm): LayerNorm((20,), eps=1e-06, elementwise_affine=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
    

In [18]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create DataLoader
dataset = TensorDataset(eeg_tensor, labels_tensor)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X, electrodes=electrode_names)  # Pass electrodes explicitly
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 200 but got size 504 for tensor number 1 in the list.