<a href="https://colab.research.google.com/github/Acor-Kyudou/Neuro_Motion/blob/main/process_simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install mne numpy scipy torch

Collecting mne
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cur

In [2]:
import mne
import os
import numpy as np
from scipy import signal
import torch
import torch.utils.data as data
from google.colab import drive

In [3]:
drive.mount('/content/drive')
DATASET_PATH = "/content/drive/MyDrive/dataset/raw_data/files"
SAVE_PATH = "/content/drive/MyDrive/dataset/preprocessed_data"
OPEN_CLOSE_LEFT_RIGHT_FIST = [3, 7, 11]
CLASSES = ["left", "right"]

Mounted at /content/drive


In [4]:
os.makedirs(SAVE_PATH, exist_ok=True)

# Check dataset path
if not os.path.exists(DATASET_PATH):
    raise ValueError(f"Dataset path does not exist: {DATASET_PATH}")
print(f"Dataset path exists: {DATASET_PATH}")

def get_edf_paths(subject_ids, run_numbers):
    physionet_paths = []
    for subject_id in subject_ids:
        subject_folder = f"S{subject_id:03d}"
        subject_path = os.path.join(DATASET_PATH, subject_folder)
        if not os.path.exists(subject_path):
            print(f"Subject path does not exist: {subject_path}")
            continue
        for run in run_numbers:
            run_file = f"{subject_folder}R{run:02d}.edf"
            file_path = os.path.join(subject_path, run_file)
            if os.path.exists(file_path):
                physionet_paths.append(file_path)
            else:
                print(f"File does not exist: {file_path}")
    return physionet_paths

print("Loading test EDF files...")
test_paths = get_edf_paths(range(1, 80), OPEN_CLOSE_LEFT_RIGHT_FIST)
print(f"Found {len(test_paths)} EDF files")

Dataset path exists: /content/drive/MyDrive/dataset/raw_data/files
Loading test EDF files...
Found 237 EDF files


In [5]:
if len(test_paths) == 0:
    raise ValueError("No EDF files found.")

parts = []
for path in test_paths:
    try:
        raw = mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING')
        sfreq = raw.info['sfreq']
        print(f"Sampling rate for {path}: {sfreq} Hz")
        if sfreq != 160:
            print(f"Resampling {path} from {sfreq} Hz to 160 Hz")
            raw.resample(160)
        parts.append(raw)
    except Exception as e:
        print(f"Error loading {path}: {str(e)}")

if len(parts) == 0:
    raise ValueError("No EDF files were successfully loaded.")

Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S001/S001R03.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S001/S001R07.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S001/S001R11.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S002/S002R03.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S002/S002R07.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S002/S002R11.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S003/S003R03.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S003/S003R07.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S003/S003R11.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S004/S004R03.edf: 160.0 Hz
Sampling rate for /content/drive/MyDrive/dataset/raw_data/files/S004/S004R07.edf: 160.0 Hz

In [6]:
raw = mne.concatenate_raws(parts)
events, annot = mne.events_from_annotations(raw)
print(f"Annotations found: {list(annot.keys())}")
eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads')
EEG_CHANNEL = len(eeg_channel_inds)
print(f"Number of EEG channels: {EEG_CHANNEL}")

# Create epochs
epoched = mne.Epochs(
    raw, events, dict(left=2, right=3), tmin=1, tmax=4.1,
    proj=False, picks=eeg_channel_inds, baseline=None, preload=True, verbose=True
)
X_test = epoched.get_data() * 1e3  # Convert to mV
y_test = epoched.events[:, 2] - 2  # Labels: 0=left, 1=right

Used Annotations descriptions: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
Annotations found: [np.str_('T0'), np.str_('T1'), np.str_('T2')]
Number of EEG channels: 64
Not setting metadata
3555 matching events found
No baseline correction applied
Using data from preloaded Raw for 3555 events and 497 original time points ...
189 bad epochs dropped


In [7]:
expected_time_samples = int(4.1 * 160)  # 656 samples
current_time_samples = X_test.shape[-1]
print(f"Original test X shape: {X_test.shape}, Test y shape: {y_test.shape}")
if current_time_samples != expected_time_samples:
    print(f"Resampling epochs from {current_time_samples} to {expected_time_samples} samples")
    X_test_resampled = np.zeros((X_test.shape[0], X_test.shape[1], expected_time_samples), dtype=np.float32)
    for i in range(X_test.shape[0]):
        for j in range(X_test.shape[1]):
            X_test_resampled[i, j] = signal.resample(X_test[i, j], expected_time_samples)
    X_test = X_test_resampled
    print(f"Resampled test X shape: {X_test.shape}")

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)
print(f"Final test X shape: {X_test.shape}, Test y shape: {y_test.shape}")


Original test X shape: (3366, 64, 497), Test y shape: (3366,)
Resampling epochs from 497 to 656 samples
Resampled test X shape: (3366, 64, 656)
Final test X shape: (3366, 64, 656), Test y shape: (3366,)


In [8]:
# Class distribution
left_count = np.sum(y_test == 0)
right_count = np.sum(y_test == 1)
print(f"Class distribution: Left={left_count}, Right={right_count}")

# Save preprocessed data in .npy format
np.save(os.path.join(SAVE_PATH, 'X_test.npy'), X_test)
np.save(os.path.join(SAVE_PATH, 'y_test.npy'), y_test)
print(f"Preprocessed data saved to: {os.path.join(SAVE_PATH, 'X_test.npy')}")
print(f"Labels saved to: {os.path.join(SAVE_PATH, 'y_test.npy')}")

Class distribution: Left=1688, Right=1678
Preprocessed data saved to: /content/drive/MyDrive/dataset/preprocessed_data/X_test.npy
Labels saved to: /content/drive/MyDrive/dataset/preprocessed_data/y_test.npy


In [9]:
class EEGDataset(data.Dataset):
    def __init__(self, x, y=None, inference=False):
        super().__init__()
        self.__split = "test" if not inference else "inference"
        self.dataset = {'x': x, 'y': y} if not inference else {'x': x}

    def __len__(self):
        return len(self.dataset['x'])

    def __getitem__(self, idx):
        x = self.dataset['x'][idx]
        x = torch.tensor(x).float()
        if self.__split != "inference":
            y = self.dataset['y'][idx]
            y = torch.tensor(y).unsqueeze(-1).float()
            return x, y
        return x

test_dataset = EEGDataset(X_test, y_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)