In [1]:
import torch
import pandas as pd 
from tbparse import SummaryReader

In [11]:
LOG_DIR_1 = "logs/finetune_challenge2_1762157150/events.out.tfevents.1762157150.93588e1400d5.17321.0"

reader = SummaryReader(LOG_DIR_1)
df = reader.scalars
df.head()

Unnamed: 0,step,tag,value
0,1,Val/backbone/MSE,0.639654
1,2,Val/backbone/MSE,0.601093
2,1,Val/backbone/RMSE,0.79975
3,2,Val/backbone/RMSE,0.775275
4,1,backbone/Active_Mixtures,2.993335


In [12]:
columns = df['tag'].unique().tolist()
index = df['step'].unique().tolist()

data = {col: [] for col in columns}
for step in index:
    step_data = df[df['step'] == step]
    for col in columns:

        value = step_data[step_data['tag'] == col]['value']
        if not value.empty:
            data[col].append(value.values[0])
        else:
            data[col].append(None)
# ensure 'Epoch' is the first column
data.pop('Epoch', None)
data = {'Epoch': index, **data}

dataframe = pd.DataFrame(data)
dataframe.head()

Unnamed: 0,Epoch,Val/backbone/MSE,Val/backbone/RMSE,backbone/Active_Mixtures,backbone/LR,backbone/MDN_Loss,backbone/MSE,backbone/RMSE,backbone/π_Entropy,backbone/π_Max
0,1,0.639654,0.79975,2.993335,2e-05,1.168489,0.545115,0.738296,1.036734,0.463129
1,2,0.601093,0.775275,2.881944,4e-05,0.962732,0.475892,0.689637,0.927527,0.582579


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Epoch                     2 non-null      int64  
 1   Val/backbone/MSE          2 non-null      float64
 2   Val/backbone/RMSE         2 non-null      float64
 3   backbone/Active_Mixtures  2 non-null      float64
 4   backbone/LR               2 non-null      float64
 5   backbone/MDN_Loss         2 non-null      float64
 6   backbone/MSE              2 non-null      float64
 7   backbone/RMSE             2 non-null      float64
 8   backbone/π_Entropy        2 non-null      float64
 9   backbone/π_Max            2 non-null      float64
dtypes: float64(9), int64(1)
memory usage: 292.0 bytes


## EDA the lenghth of dataset

In [1]:
from pathlib import Path
from eegdash.dataset import EEGChallengeDataset
from braindecode.datasets.base import EEGWindowsDataset, BaseConcatDataset, BaseDataset
from braindecode.preprocessing import Preprocessor, preprocess, create_windows_from_events
from braindecode.preprocessing import create_fixed_length_windows

import torch
from model.meta_encoder import MetaEncoder
import numpy as np
import joblib
from tqdm.notebook import tqdm

In [2]:
SFREQ = 100
CROP_SEC = 2
WINDOW_SEC = 4
STRIDE_SEC = 2
DESCRIPTION_FILEDS = [
    "subject", "session", "run", "task", "age", "gender", "sex", "p_factor"
]
TASK_NAMES = [
    "RestingState", "DespicableMe", "DiaryOfAWimpyKid", "FunwithFractals",
    "ThePresent", "contrastChangeDetection", "seqLearning6target",
    "seqLearning8target", "surroundSupp", "symbolSearch"
]

CropWrapper Dataset

In [3]:
class CropMetaWrapper(BaseDataset):
    def __init__(self, windows_ds, 
                        crop_samples, 
                        meta_encoder, 
                        target_name="externalizing"):
        
        self.windows_ds = windows_ds
        self.crop_samples = crop_samples
        self.meta_encoder = meta_encoder
        self.target_name = target_name
        self.rng = np.random.default_rng(2025)  # fixed seed

    def __len__(self):
        return len(self.windows_ds)

    def __getitem__(self, idx):
        X, _, crop_inds = self.windows_ds[idx]  # X: (C, 4*SFREQ)

        # Target
        target = float(self.windows_ds.description[self.target_name])

        # Meta
        desc = self.windows_ds.description
        meta_dict = {
            "task": desc["task"],
            "sex": desc["sex"],
            "age": float(desc["age"]),
        }
        meta_vec = self.meta_encoder.transform(meta_dict)

        # Random 2s crop
        i_win, i_start, i_stop = crop_inds


        assert i_stop - i_start >= self.crop_samples

        # FIXED: .integers instead of .randint
        offset = self.rng.integers(0, i_stop - i_start - self.crop_samples + 1)
        i_start = i_start + offset
        i_stop = i_start + self.crop_samples
        X_crop = X[:, offset : offset + self.crop_samples]  # (C, 2*SFREQ)

        # Infos
        infos = {
            "subject": desc["subject"],
            "session": desc.get("session", ""),
            "run": desc.get("run", ""),
            "task": desc["task"],
            "sex": desc["sex"],
            "age": float(desc["age"]),
        }

        return torch.tensor(X_crop), meta_vec, target, (i_win, i_start, i_stop), infos


In [4]:
all_subdatasets = []
meta_encoder = []

RELEASE = [f"R{i}" for i in range(1, 12)]
data_root = Path('MyEEGData_full')

Load the joblib

In [5]:
release = "R5"
task = "RestingState"

preproc_root = Path('preprocess_data/challenge2')
preproc_root.mkdir(parents=True, exist_ok=True)

load_path = preproc_root / f"{release}_windows_task[{task}].pkl"
print(load_path)

list_windows = []
try: 
    windows_ds = joblib.load(load_path)
    list_windows.append(windows_ds)
    print(f"  -> Successfully loaded windows from {load_path}")
except Exception as e:
    print(f"  -> FAILED to load windows from {load_path}: {e}")

preprocess_data/challenge2/R5_windows_task[RestingState].pkl


  -> Successfully loaded windows from preprocess_data/challenge2/R5_windows_task[RestingState].pkl


Uploading the preprocessed data

In [7]:
list_windows = []

RELEASE = ['R5']
total = len(RELEASE) * len(TASK_NAMES)
pbar = tqdm(total=total, desc = "Loading preprocessed windows")

for release in RELEASE:
    for task in TASK_NAMES:

        load_path = preproc_root / f"{release}_windows_task[{task}].pkl"
        try:
            windows_ds = joblib.load(load_path)
            list_windows.append(windows_ds)
        except Exception as e:
            print(f"  -> FAILED to load windows from {load_path}: {e}")
        finally:
            pbar.update(1)
            
pbar.close()
all_windows_ds = BaseConcatDataset(list_windows)
print(f"Total windows dataset size: {len(all_windows_ds)}")

Loading preprocessed windows:   0%|          | 0/10 [00:00<?, ?it/s]

Total windows dataset size: 447063


In [11]:
list_subjects = []
for i in range(len(all_windows_ds.datasets)):
    subject = all_windows_ds.datasets[i].windows_ds.description['subject']
    list_subjects.append(subject)

In [None]:
# SPlit the subject 
unique_subjects = list(set(list_subjects))

total_length = len(unique_subjects)
train_size = int(0.8 * total_length)
val_size = total_length - train_size

train_subjects = unique_subjects[: train_size]
val_subjects = unique_subjects[train_size : ]

print(f"Total subjects: {len(unique_subjects)}")
print(f"Train subjects: {len(train_subjects)}")
print(f"Validation subjects: {len(val_subjects)}")

train_datasets = []
val_datasets = []

for ds in all_windows_ds.datasets:
    subject = ds.windows_ds.description['subject']
    if subject in train_subjects:
        train_datasets.append(ds)
    elif subject in val_subjects:
        val_datasets.append(ds)

train_window_data = BaseConcatDataset(train_datasets)
val_window_data = BaseConcatDataset(val_datasets)
len_train = len(train_window_data)
len_val = len(val_window_data)

print(f"Train windows: {len_train}")
print(f"Validation windows: {len_val}")
assert len_train + len_val == len(all_windows_ds)

In [23]:
# SPlit the subject 
from sklearn.model_selection import train_test_split

unique_subjects = list(set(list_subjects))

total_length = len(unique_subjects)
train_size = int(0.8 * total_length)
val_size = int(0.1 * total_length)
test_size = total_length - train_size - val_size
seed = 2005

train_subj, val_test_subj = train_test_split(
    unique_subjects, train_size=train_size, test_size=val_size + test_size, random_state=seed
)

val_subj, test_subj = train_test_split(
    val_test_subj, train_size=val_size, test_size=test_size, random_state=seed
)

def split_by_subjects(dataset, subject_list):
    indices = []
    for ds in dataset.datasets:
        subject = ds.windows_ds.description['subject']
        if subject in subject_list:
            indices.append(ds)

    return BaseConcatDataset(indices)

train_set = split_by_subjects(all_windows_ds, train_subj)
val_set = split_by_subjects(all_windows_ds, val_subj)
test_set = split_by_subjects(all_windows_ds, test_subj)

In [25]:
print(f"Train windows: {len(train_set)}")
print(f"Validation windows: {len(val_set)}")
print(f"Test windows: {len(test_set)}")

print(f"Total windows: {len(train_set) + len(val_set) + len(test_set)}")

Train windows: 355181
Validation windows: 45101
Test windows: 46781
Total windows: 447063


In [26]:
xtrain_sample1 = train_window_data[0]
X, meta_vec, target, crop_inds, infos = xtrain_sample1

In [33]:
train_window_data = BaseConcatDataset(train_datasets)
val_window_data = BaseConcatDataset(val_datasets)

print(f"Train windows: {len(train_window_data)}")
print(f"Validation windows: {len(val_window_data)}")

Train windows: 52885
Validation windows: 13422


In [19]:
train_sample1 = train_window_data[0]
X, meta_vec, target, crop_inds, infos = train_sample1