<a href="https://colab.research.google.com/github/AyeshaAnzerBCIT/Multisource/blob/main/Fusion_preprocess_(EEG).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import scipy.io
import h5py
import pandas as pd
import matplotlib.pyplot as plt
import torchaudio.transforms as T
from scipy.stats import entropy
from tqdm import tqdm
import scipy.signal
import numpy as np
import gc
from scipy.signal import welch  #  Import Welch's method for PSD computation
from scipy.stats import entropy

In [None]:
# --- SET DEVICE ---
# Force the use of GPU 1
device = torch.device("cuda:0" if torch.cuda.device_count() > 1 else "cuda:0")

print(f" Using device: {device}")

 Using device: cuda:0


In [None]:
# --- STEP 1: FIND ALL EEG FILES ---
def find_mat_files(root_dir, modality="EEG"):
    """Finds all .mat files under the given modality folder."""
    mat_files = []
    for subdir, _, files in os.walk(root_dir):
        if modality in subdir and "mat_format" in subdir:
            for file in files:
                if file.endswith(".mat"):
                    mat_files.append(os.path.join(subdir, file))
    return mat_files

root_dataset_path = "/data/ayesha/PhD/dataset/EEG"
mat_files = find_mat_files(root_dataset_path)
print(f"Total .mat files found: {len(mat_files)}")

Total .mat files found: 1392


In [None]:
#Step 2
def load_mat_file(file_path):
    """Loads EEG data from MATLAB v7.3 .mat files using h5py."""
    try:
        with h5py.File(file_path, 'r') as f:
            data = {}

            if 'EEG' in f:
                eeg_group = f['EEG']

                if 'data' in eeg_group:
                    eeg_data = eeg_group['data'][()]
                    eeg_data = torch.tensor(eeg_data, dtype=torch.float32).to(device)

                    if eeg_data.shape[1] > eeg_data.shape[0]:
                        eeg_data = eeg_data.T

                    data['EEG'] = eeg_data
                    print(f" Loaded EEG data from {file_path} with shape {eeg_data.shape}")  # Debug print

                if 'srate' in eeg_group:
                    srate = eeg_group['srate'][()]
                    if srate.shape == (1, 1):  # Extract scalar value
                        srate = srate[0, 0]
                    data['srate'] = torch.tensor(srate, dtype=torch.float32).to(device)
                else:
                    data['srate'] = torch.tensor(256.0, dtype=torch.float32).to(device)

            return data if 'EEG' in data else None
    except Exception as e:
        print(f"⚠ Error loading {file_path}: {e}")
        return None

In [None]:
# --- STEP 3: CHECK IF EEG DATA IS PREPROCESSED ---
def is_preprocessed(file_path):
    """Checks if the EEG data is already preprocessed."""
    return "preprocessed" in file_path.lower()

In [None]:
# --- STEP 4: TORCH-BASED EEG SIGNAL PREPROCESSING ---

def bandpass_filter_torch(data, lowcut=0.5, highcut=50.0, fs=256, order=5):
    """Applies a bandpass filter using SciPy on GPU 1."""
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = scipy.signal.butter(order, [low, high], btype='band')

    filtered_np = scipy.signal.filtfilt(b, a, data.cpu().numpy(), axis=1)
    filtered_np = np.ascontiguousarray(filtered_np)

    return torch.tensor(filtered_np, dtype=torch.float32, device=device)  # Move to GPU 1

def normalize_signal_torch(data):
    """Normalizes EEG signals on GPU 1."""
    return ((data - data.min()) / (data.max() - data.min()) * 2 - 1).to(device)


In [None]:
# --- STEP 5: PREPROCESS EEG ---


def preprocess_eeg(mat_files, save_path="/data/ayesha/PhD/dataset/EEG/preprocessed"):
    """Preprocesses EEG signals and saves them with memory optimizations."""
    os.makedirs(save_path, exist_ok=True)
    preprocessed_data = {}

    print(f"Processing {len(mat_files)} EEG files...")

    for file_path in tqdm(mat_files, desc=" Processing EEG", unit="file"):
        data = load_mat_file(file_path)

        if data and "EEG" in data:
            eeg_signal = data["EEG"].squeeze()
            srate = data.get("srate", torch.tensor(256)).item()

            # Apply preprocessing
            filtered_signal = bandpass_filter_torch(eeg_signal, fs=srate)
            normalized_signal = normalize_signal_torch(filtered_signal)

            subject_id = os.path.basename(file_path).split(".")[0]
            torch.save(normalized_signal, os.path.join(save_path, f"{subject_id}_preprocessed.pt"))

            #  Free memory after each file
            del filtered_signal, normalized_signal, data
            torch.cuda.empty_cache()  # Release GPU memory
            gc.collect()  # Free Python memory

    print(" Preprocessing Complete!")
    return preprocessed_data


In [None]:
def compute_psd_torch(data, fs=256):
    """Compute Power Spectral Density (PSD) using Welch’s method."""
    psd_list = []
    freq_list = []

    for channel in data:  # Iterate over EEG channels
        if isinstance(channel, torch.Tensor):
            channel = channel.cpu().numpy()  #  Ensure it's a NumPy array

        freqs, psd = welch(channel, fs, nperseg=fs)
        psd_list.append(psd)
        freq_list.append(freqs)

    return np.array(psd_list), np.array(freq_list)

def hjorth_parameters(eeg_signal):
    """Compute Hjorth Mobility and Complexity."""
    variance = np.var(eeg_signal, axis=1)  # Variance per channel
    first_derivative = np.diff(eeg_signal, axis=1)
    variance_derivative = np.var(first_derivative, axis=1)

    mobility = np.sqrt(variance_derivative / variance)
    complexity = np.sqrt(np.var(np.diff(first_derivative, axis=1), axis=1) / variance_derivative) / mobility

    return mobility, complexity

def spectral_entropy(eeg_signal, fs=256):
    """Compute Spectral Entropy from EEG signal."""
    psd_values, _ = compute_psd_torch(eeg_signal, fs)
    psd_normalized = psd_values / np.sum(psd_values, axis=1, keepdims=True)  # Normalize PSD
    return np.apply_along_axis(entropy, 1, psd_normalized, base=2)


In [None]:
# --- STEP 7: SAVE FEATURES ---
def save_features(preprocessed_data, save_path="/data/ayesha/PhD/dataset/EEG/features"):
    """Extracts and saves EEG features from preprocessed .pt files, avoiding duplicate processing."""
    os.makedirs(save_path, exist_ok=True)

    # ? Get list of already processed files
    processed_files = {f.replace("_features.csv", "") for f in os.listdir(save_path) if f.endswith(".csv")}

    # ? Filter only unprocessed EEG files
    unprocessed_data = {subj: path for subj, path in preprocessed_data.items() if subj not in processed_files}

    if not unprocessed_data:
        print("? All EEG files have already been processed! No new processing required.")
        return

    print(f"?? Extracting features for {len(unprocessed_data)} NEW subjects...")

    for subject_id, file_path in tqdm(unprocessed_data.items(), desc=" Extracting Features", unit="subject"):
        try:
            # ? Load EEG data
            eeg_data = torch.load(file_path, map_location="cpu")

            if not isinstance(eeg_data, torch.Tensor):
                print(f"?? Warning: Data in {file_path} is not a tensor.")
                continue

            print(f"?? Processing {subject_id} - EEG Shape: {eeg_data.shape}")  # Debugging

            # Compute EEG Features
            psd_values, _ = compute_psd_torch(eeg_data)
            mobility, complexity = hjorth_parameters(eeg_data.numpy())  # Convert to NumPy
            entropy_values = spectral_entropy(eeg_data.numpy())

            # Organize Features
            feature_dict = {
                "Hjorth_Mobility": mobility.tolist(),
                "Hjorth_Complexity": complexity.tolist(),
                "Spectral_Entropy": entropy_values.tolist()
            }

            df_features = pd.DataFrame(feature_dict)
            df_features.to_csv(os.path.join(save_path, f"{subject_id}_features.csv"), index=False)

        except Exception as e:
            print(f"? Error processing {subject_id}: {e}")

    print("? Feature Extraction Complete!")


In [None]:
# ------------------------------ #
# Run the Updated Pipeline
# ------------------------------ #

print("?? Starting PyTorch-Based EEG Preprocessing & Feature Extraction...\n")

unzipped_path = "/data/ayesha/PhD/dataset/EEG/preprocessed_unzipped"
preprocessed_data = {
    f.replace("_preprocessed.pt", ""): os.path.join(unzipped_path, f)
    for f in os.listdir(unzipped_path) if f.endswith(".pt")
}

if not preprocessed_data:
    print("?? No EEG .pt files found! Check dataset path and structure.")
else:
    save_features(preprocessed_data)  # ? Process only new files

?? Starting PyTorch-Based EEG Preprocessing & Feature Extraction...

?? Extracting features for 201 NEW subjects...


  eeg_data = torch.load(file_path, map_location="cpu")


?? Processing gip_A00053990019 - EEG Shape: torch.Size([149903, 111])


  freqs, _, Pxy = _spectral_helper(x, y, fs, window, nperseg, noverlap,
 Extracting Features:   0%|              | 1/201 [00:48<2:40:19, 48.10s/subject]

?? Processing gp_A00055837001 - EEG Shape: torch.Size([200521, 111])


 Extracting Features:   1%|▏             | 2/201 [01:51<3:08:39, 56.88s/subject]

?? Processing gp_A00054535007 - EEG Shape: torch.Size([111576, 111])


 Extracting Features:   1%|▏             | 3/201 [02:26<2:35:15, 47.05s/subject]

?? Processing oip_A00055024014 - EEG Shape: torch.Size([160203, 111])


 Extracting Features:   2%|▎             | 4/201 [03:15<2:37:33, 47.99s/subject]

?? Processing gp_A00054766016 - EEG Shape: torch.Size([19401, 111])


 Extracting Features:   2%|▎             | 5/201 [03:22<1:48:17, 33.15s/subject]

?? Processing gip_A00057630007 - EEG Shape: torch.Size([102378, 111])


 Extracting Features:   3%|▍             | 6/201 [03:54<1:46:08, 32.66s/subject]

?? Processing oip_A00055628016 - EEG Shape: torch.Size([77676, 111])


 Extracting Features:   3%|▍             | 7/201 [04:18<1:36:18, 29.79s/subject]

?? Processing gp_A00055682007 - EEG Shape: torch.Size([122327, 111])


 Extracting Features:   4%|▌             | 8/201 [04:58<1:46:05, 32.98s/subject]

?? Processing oip_A00057630011 - EEG Shape: torch.Size([161452, 111])


 Extracting Features:   4%|▋             | 9/201 [05:50<2:04:25, 38.89s/subject]

?? Processing oip_A00053375005 - EEG Shape: torch.Size([141628, 111])


 Extracting Features:   5%|▋            | 10/201 [06:34<2:09:18, 40.62s/subject]

?? Processing gip_A00054369008 - EEG Shape: torch.Size([166429, 111])


 Extracting Features:   5%|▋            | 11/201 [07:27<2:20:15, 44.29s/subject]

?? Processing op_A00055662002 - EEG Shape: torch.Size([197481, 111])


 Extracting Features:   6%|▊            | 12/201 [08:29<2:36:22, 49.64s/subject]

?? Processing gip_A00055893002 - EEG Shape: torch.Size([169553, 111])


 Extracting Features:   6%|▊            | 13/201 [09:22<2:39:06, 50.78s/subject]

?? Processing gip_A00054894016 - EEG Shape: torch.Size([72977, 111])


 Extracting Features:   7%|▉            | 14/201 [09:45<2:12:17, 42.45s/subject]

?? Processing gip_A00054517005 - EEG Shape: torch.Size([109803, 111])


 Extracting Features:   7%|▉            | 15/201 [10:19<2:03:33, 39.86s/subject]

?? Processing gip_A00055296003 - EEG Shape: torch.Size([88177, 111])


 Extracting Features:   8%|█            | 16/201 [10:47<1:51:58, 36.32s/subject]

?? Processing gip_A00056166010 - EEG Shape: torch.Size([159003, 111])


 Extracting Features:   8%|█            | 17/201 [11:37<2:04:07, 40.48s/subject]

?? Processing gip_A00055956008 - EEG Shape: torch.Size([112501, 111])


 Extracting Features:   9%|█▏           | 18/201 [12:12<1:58:14, 38.77s/subject]

?? Processing bip_A00056693010 - EEG Shape: torch.Size([194930, 111])


 Extracting Features:   9%|█▏           | 19/201 [13:15<2:19:26, 45.97s/subject]

?? Processing oip_A00054647014 - EEG Shape: torch.Size([174753, 111])


 Extracting Features:  10%|█▎           | 20/201 [14:12<2:29:00, 49.39s/subject]

?? Processing oip_A00055424010 - EEG Shape: torch.Size([245855, 111])


 Extracting Features:  10%|█▎           | 21/201 [15:32<2:55:28, 58.49s/subject]

?? Processing bp_A00054694011 - EEG Shape: torch.Size([126829, 111])


 Extracting Features:  11%|█▍           | 22/201 [16:13<2:38:43, 53.20s/subject]

?? Processing gp_A00063558014 - EEG Shape: torch.Size([21725, 111])


 Extracting Features:  11%|█▍           | 23/201 [16:20<1:56:42, 39.34s/subject]

?? Processing gip_A00054359009 - EEG Shape: torch.Size([93352, 111])


 Extracting Features:  12%|█▌           | 24/201 [16:50<1:48:05, 36.64s/subject]

?? Processing gp_A00051826003 - EEG Shape: torch.Size([79477, 111])


 Extracting Features:  12%|█▌           | 25/201 [17:15<1:37:23, 33.20s/subject]

?? Processing gip_A00055649004 - EEG Shape: torch.Size([125353, 111])


 Extracting Features:  13%|█▋           | 26/201 [17:55<1:43:01, 35.32s/subject]

?? Processing bip_A00055038013 - EEG Shape: torch.Size([201355, 111])


 Extracting Features:  13%|█▋           | 27/201 [18:59<2:07:05, 43.82s/subject]

?? Processing oip_A00063558013 - EEG Shape: torch.Size([88502, 111])


 Extracting Features:  14%|█▊           | 28/201 [19:27<1:52:38, 39.07s/subject]

?? Processing oip_A00056723001 - EEG Shape: torch.Size([196104, 111])


 Extracting Features:  14%|█▉           | 29/201 [20:30<2:12:03, 46.07s/subject]

?? Processing gip_A00055540013 - EEG Shape: torch.Size([170280, 111])


 Extracting Features:  15%|█▉           | 30/201 [21:24<2:18:07, 48.46s/subject]

?? Processing bp_A00054721004 - EEG Shape: torch.Size([97275, 111])


 Extracting Features:  15%|██           | 31/201 [21:54<2:01:46, 42.98s/subject]

?? Processing gp_A00058596002 - EEG Shape: torch.Size([205029, 111])


 Extracting Features:  16%|██           | 32/201 [22:57<2:18:35, 49.20s/subject]

?? Processing gp_A00062125009 - EEG Shape: torch.Size([157679, 111])


 Extracting Features:  16%|██▏          | 33/201 [23:48<2:18:39, 49.52s/subject]

?? Processing bp_A00057092010 - EEG Shape: torch.Size([120354, 111])


 Extracting Features:  17%|██▏          | 34/201 [24:25<2:07:47, 45.91s/subject]

?? Processing gp_A00054239008 - EEG Shape: torch.Size([184653, 111])


 Extracting Features:  17%|██▎          | 35/201 [25:24<2:17:27, 49.68s/subject]

?? Processing gip_A00059578003 - EEG Shape: torch.Size([142303, 111])


 Extracting Features:  18%|██▎          | 36/201 [26:08<2:12:07, 48.05s/subject]

?? Processing oip_A00053480005 - EEG Shape: torch.Size([120552, 111])


 Extracting Features:  18%|██▍          | 37/201 [26:45<2:02:38, 44.87s/subject]

?? Processing gip_A00054866 005 - EEG Shape: torch.Size([135503, 111])


 Extracting Features:  19%|██▍          | 38/201 [27:28<2:00:03, 44.19s/subject]

?? Processing oip_A00063051 007 - EEG Shape: torch.Size([250930, 111])


 Extracting Features:  19%|██▌          | 39/201 [28:47<2:27:13, 54.53s/subject]

?? Processing op_A00058596004 - EEG Shape: torch.Size([139803, 111])


 Extracting Features:  20%|██▌          | 40/201 [29:30<2:17:41, 51.31s/subject]

?? Processing gp_A00054239004 - EEG Shape: torch.Size([133604, 111])


 Extracting Features:  20%|██▋          | 41/201 [30:12<2:09:21, 48.51s/subject]

?? Processing bip_A00054215010 - EEG Shape: torch.Size([167279, 111])


 Extracting Features:  21%|██▋          | 42/201 [31:05<2:11:48, 49.74s/subject]

?? Processing gp_A00055085003 - EEG Shape: torch.Size([85802, 111])


 Extracting Features:  21%|██▊          | 43/201 [31:32<1:52:37, 42.77s/subject]

?? Processing gip_A00054836003 - EEG Shape: torch.Size([78826, 111])


 Extracting Features:  22%|██▊          | 44/201 [31:57<1:38:03, 37.48s/subject]

?? Processing bp_A00056428009 - EEG Shape: torch.Size([201379, 111])


 Extracting Features:  22%|██▉          | 45/201 [33:01<1:58:11, 45.46s/subject]

?? Processing gip_A00058775008 - EEG Shape: torch.Size([157152, 111])


 Extracting Features:  23%|██▉          | 46/201 [33:50<2:00:25, 46.62s/subject]

?? Processing gip_A00054907002 - EEG Shape: torch.Size([153928, 111])


 Extracting Features:  23%|███          | 47/201 [34:39<2:01:44, 47.43s/subject]

?? Processing gip_A00055956006 - EEG Shape: torch.Size([117827, 111])


 Extracting Features:  24%|███          | 48/201 [35:17<1:53:47, 44.63s/subject]

?? Processing oip_A00054659006 - EEG Shape: torch.Size([147278, 111])


 Extracting Features:  24%|███▏         | 49/201 [36:05<1:55:34, 45.62s/subject]

?? Processing oip_A00059904003 - EEG Shape: torch.Size([71877, 111])


 Extracting Features:  25%|███▏         | 50/201 [36:28<1:37:17, 38.66s/subject]

?? Processing oip_A00056257009 - EEG Shape: torch.Size([160255, 111])


 Extracting Features:  25%|███▎         | 51/201 [37:18<1:45:08, 42.05s/subject]

?? Processing oip_A00055893013 - EEG Shape: torch.Size([263680, 111])


 Extracting Features:  26%|███▎         | 52/201 [38:43<2:16:12, 54.85s/subject]

?? Processing gip_A00054659013 - EEG Shape: torch.Size([161279, 111])


 Extracting Features:  26%|███▍         | 53/201 [39:35<2:13:21, 54.06s/subject]

?? Processing gp_A00055024011 - EEG Shape: torch.Size([129902, 111])


 Extracting Features:  27%|███▍         | 54/201 [40:17<2:03:48, 50.54s/subject]

?? Processing gp_A00055085005 - EEG Shape: torch.Size([104378, 111])


 Extracting Features:  27%|███▌         | 55/201 [40:50<1:50:27, 45.39s/subject]

?? Processing gip_A00051886005 - EEG Shape: torch.Size([113577, 111])


 Extracting Features:  28%|███▌         | 56/201 [41:26<1:42:31, 42.43s/subject]

?? Processing gip_A00055065013 - EEG Shape: torch.Size([159854, 111])


 Extracting Features:  28%|███▋         | 57/201 [42:16<1:47:40, 44.86s/subject]

?? Processing bip_A00055055010 - EEG Shape: torch.Size([159628, 111])


 Extracting Features:  29%|███▊         | 58/201 [43:08<1:51:41, 46.86s/subject]

?? Processing gip_A00056716008 - EEG Shape: torch.Size([187728, 111])


 Extracting Features:  29%|███▊         | 59/201 [44:10<2:01:22, 51.28s/subject]

?? Processing gp_A00058596001 - EEG Shape: torch.Size([225531, 111])


 Extracting Features:  30%|███▉         | 60/201 [45:22<2:15:37, 57.71s/subject]

?? Processing oip_A00053375007 - EEG Shape: torch.Size([188778, 111])


 Extracting Features:  30%|███▉         | 61/201 [46:23<2:16:35, 58.54s/subject]

?? Processing oip_A00054917004 - EEG Shape: torch.Size([149052, 111])


 Extracting Features:  31%|████         | 62/201 [47:09<2:07:02, 54.83s/subject]

?? Processing gip_A00054743014 - EEG Shape: torch.Size([154004, 111])


 Extracting Features:  31%|████         | 63/201 [47:57<2:01:16, 52.73s/subject]

?? Processing gip_A00054743007 - EEG Shape: torch.Size([103275, 111])


 Extracting Features:  32%|████▏        | 64/201 [48:29<1:46:34, 46.67s/subject]

?? Processing gp_A00054469005 - EEG Shape: torch.Size([107603, 111])


 Extracting Features:  32%|████▏        | 65/201 [49:02<1:36:30, 42.58s/subject]

?? Processing gip_A00054894010 - EEG Shape: torch.Size([160329, 111])


 Extracting Features:  33%|████▎        | 66/201 [49:52<1:40:35, 44.71s/subject]

?? Processing oip_A00054517006 - EEG Shape: torch.Size([124552, 111])


 Extracting Features:  33%|████▎        | 67/201 [50:31<1:36:04, 43.02s/subject]

?? Processing gip_A00062842005 - EEG Shape: torch.Size([145974, 111])


 Extracting Features:  34%|████▍        | 68/201 [51:17<1:37:07, 43.82s/subject]

?? Processing gip_A00054866 011 - EEG Shape: torch.Size([159804, 111])


 Extracting Features:  34%|████▍        | 69/201 [52:08<1:40:58, 45.90s/subject]

?? Processing gp_A00056913010 - EEG Shape: torch.Size([126103, 111])


 Extracting Features:  35%|████▌        | 70/201 [52:47<1:36:13, 44.07s/subject]

?? Processing oip_A00062453007 - EEG Shape: torch.Size([200128, 111])


 Extracting Features:  35%|████▌        | 71/201 [53:50<1:47:22, 49.56s/subject]

?? Processing oip_A00057630004 - EEG Shape: torch.Size([145279, 111])


 Extracting Features:  36%|████▋        | 72/201 [54:35<1:43:57, 48.36s/subject]

?? Processing gip_A00062842003 - EEG Shape: torch.Size([252080, 111])


 Extracting Features:  36%|████▋        | 73/201 [55:55<2:03:00, 57.66s/subject]

?? Processing gip_A00056913009 - EEG Shape: torch.Size([122177, 111])


 Extracting Features:  37%|████▊        | 74/201 [56:34<1:50:26, 52.17s/subject]

?? Processing op_A00056116015 - EEG Shape: torch.Size([169729, 111])


 Extracting Features:  37%|████▊        | 75/201 [57:26<1:49:38, 52.21s/subject]

?? Processing gip_A00054469006 - EEG Shape: torch.Size([101028, 111])


 Extracting Features:  38%|████▉        | 76/201 [57:58<1:36:14, 46.19s/subject]

?? Processing gip_A00054387010 - EEG Shape: torch.Size([148428, 111])


 Extracting Features:  38%|████▉        | 77/201 [58:46<1:36:01, 46.46s/subject]

?? Processing oip_A00056716002 - EEG Shape: torch.Size([90127, 111])


 Extracting Features:  39%|█████        | 78/201 [59:14<1:23:58, 40.97s/subject]

?? Processing oip_A00057599008 - EEG Shape: torch.Size([146003, 111])


 Extracting Features:  39%|█████        | 79/201 [59:59<1:26:08, 42.37s/subject]

?? Processing bp_A00062329013 - EEG Shape: torch.Size([217029, 111])


 Extracting Features:  40%|████▍      | 80/201 [1:01:08<1:41:19, 50.25s/subject]

?? Processing gp_A00054469008 - EEG Shape: torch.Size([94277, 111])


 Extracting Features:  40%|████▍      | 81/201 [1:01:37<1:27:53, 43.94s/subject]

?? Processing gp_A00053440_part2005 - EEG Shape: torch.Size([123152, 111])


 Extracting Features:  41%|████▍      | 82/201 [1:02:16<1:24:10, 42.44s/subject]

?? Processing gip_A00055065012 - EEG Shape: torch.Size([161755, 111])


 Extracting Features:  41%|████▌      | 83/201 [1:03:07<1:28:13, 44.86s/subject]

?? Processing gip_A00051955003 - EEG Shape: torch.Size([88553, 111])


 Extracting Features:  42%|████▌      | 84/201 [1:03:34<1:17:07, 39.55s/subject]

?? Processing gip_A00054930010 - EEG Shape: torch.Size([156653, 111])


 Extracting Features:  42%|████▋      | 85/201 [1:04:23<1:21:55, 42.37s/subject]

?? Processing gp_A00054517017 - EEG Shape: torch.Size([72551, 111])


 Extracting Features:  43%|████▋      | 86/201 [1:04:45<1:09:48, 36.42s/subject]

?? Processing oip_A00055392006 - EEG Shape: torch.Size([124152, 111])


 Extracting Features:  43%|████▊      | 87/201 [1:05:23<1:09:56, 36.82s/subject]

?? Processing bip_A00054215011 - EEG Shape: torch.Size([125600, 111])


 Extracting Features:  44%|████▊      | 88/201 [1:06:02<1:10:47, 37.59s/subject]

?? Processing gip_A00057630002 - EEG Shape: torch.Size([137177, 111])


 Extracting Features:  44%|████▊      | 89/201 [1:06:45<1:13:13, 39.22s/subject]

?? Processing oip_A00055956010 - EEG Shape: torch.Size([164329, 111])


 Extracting Features:  45%|████▉      | 90/201 [1:07:36<1:18:56, 42.67s/subject]

?? Processing bp_A00056002010 - EEG Shape: torch.Size([182754, 111])


 Extracting Features:  45%|████▉      | 91/201 [1:08:33<1:25:43, 46.76s/subject]

?? Processing bp_A00056166007 - EEG Shape: torch.Size([115751, 111])


 Extracting Features:  46%|█████      | 92/201 [1:09:09<1:19:16, 43.64s/subject]

?? Processing oip_A00054666002 - EEG Shape: torch.Size([232579, 111])


 Extracting Features:  46%|█████      | 93/201 [1:10:22<1:34:29, 52.50s/subject]

?? Processing gip_A00058775002 - EEG Shape: torch.Size([123352, 111])


 Extracting Features:  47%|█████▏     | 94/201 [1:11:01<1:26:22, 48.43s/subject]

?? Processing oip_A00055077011 - EEG Shape: torch.Size([161978, 111])


 Extracting Features:  47%|█████▏     | 95/201 [1:11:52<1:26:46, 49.12s/subject]

?? Processing gip_A00055628014 - EEG Shape: torch.Size([198704, 111])


 Extracting Features:  48%|█████▎     | 96/201 [1:12:54<1:33:01, 53.15s/subject]

?? Processing gip_A00053990008 - EEG Shape: torch.Size([63901, 111])


 Extracting Features:  48%|█████▎     | 97/201 [1:13:14<1:14:47, 43.15s/subject]

?? Processing gip_A00059083017 - EEG Shape: torch.Size([77926, 111])


 Extracting Features:  49%|█████▎     | 98/201 [1:13:39<1:04:30, 37.57s/subject]

?? Processing gp_A00054239010 - EEG Shape: torch.Size([154778, 111])


 Extracting Features:  49%|█████▍     | 99/201 [1:14:27<1:09:21, 40.80s/subject]

?? Processing gp_A00053597011 - EEG Shape: torch.Size([124877, 111])


 Extracting Features:  50%|████▉     | 100/201 [1:15:06<1:08:02, 40.42s/subject]

?? Processing gp_A00056716007 - EEG Shape: torch.Size([120928, 111])


 Extracting Features:  50%|█████     | 101/201 [1:15:44<1:06:06, 39.67s/subject]

?? Processing gip_A00059063010 - EEG Shape: torch.Size([145804, 111])


 Extracting Features:  51%|█████     | 102/201 [1:16:31<1:08:51, 41.73s/subject]

?? Processing oip_A00056054013 - EEG Shape: torch.Size([180104, 111])


 Extracting Features:  51%|█████     | 103/201 [1:17:28<1:15:28, 46.21s/subject]

?? Processing gp_A00057630005 - EEG Shape: torch.Size([147228, 111])


 Extracting Features:  52%|█████▏    | 104/201 [1:18:14<1:14:40, 46.19s/subject]

?? Processing bp_A00063051 013 - EEG Shape: torch.Size([194654, 111])


 Extracting Features:  52%|█████▏    | 105/201 [1:19:16<1:21:45, 51.10s/subject]

?? Processing bip_A00055613010 - EEG Shape: torch.Size([157728, 111])


 Extracting Features:  53%|█████▎    | 106/201 [1:20:07<1:20:47, 51.03s/subject]

?? Processing gp_A00054930006 - EEG Shape: torch.Size([109102, 111])


 Extracting Features:  53%|█████▎    | 107/201 [1:20:42<1:12:11, 46.08s/subject]

?? Processing oip_A00055662013 - EEG Shape: torch.Size([148053, 111])


 Extracting Features:  54%|█████▎    | 108/201 [1:21:29<1:12:03, 46.49s/subject]

?? Processing gip_A00054359012 - EEG Shape: torch.Size([149603, 111])


 Extracting Features:  54%|█████▍    | 109/201 [1:22:19<1:12:52, 47.52s/subject]

?? Processing gip_A00053597006 - EEG Shape: torch.Size([177829, 111])


 Extracting Features:  55%|█████▍    | 110/201 [1:23:19<1:17:48, 51.30s/subject]

?? Processing op_A00054694010 - EEG Shape: torch.Size([172780, 111])


 Extracting Features:  55%|█████▌    | 111/201 [1:24:17<1:20:05, 53.40s/subject]

?? Processing oip_A00056166011 - EEG Shape: torch.Size([126304, 111])


 Extracting Features:  56%|█████▌    | 112/201 [1:24:59<1:14:02, 49.92s/subject]

?? Processing gip_A00054907013 - EEG Shape: torch.Size([131152, 111])


 Extracting Features:  56%|█████▌    | 113/201 [1:25:43<1:10:31, 48.08s/subject]

?? Processing gp_A00054930002 - EEG Shape: torch.Size([117402, 111])


 Extracting Features:  57%|█████▋    | 114/201 [1:26:22<1:05:46, 45.36s/subject]

?? Processing bip_A00053375004 - EEG Shape: torch.Size([153579, 111])


 Extracting Features:  57%|█████▋    | 115/201 [1:27:13<1:07:30, 47.10s/subject]

?? Processing oip_A00055628006 - EEG Shape: torch.Size([120578, 111])


 Extracting Features:  58%|█████▊    | 116/201 [1:27:53<1:03:38, 44.92s/subject]

?? Processing gip_A00055024005 - EEG Shape: torch.Size([104402, 111])


 Extracting Features:  58%|██████▉     | 117/201 [1:28:27<58:27, 41.75s/subject]

?? Processing gip_A00062125004 - EEG Shape: torch.Size([251330, 111])


 Extracting Features:  59%|█████▊    | 118/201 [1:29:49<1:14:09, 53.60s/subject]

?? Processing oip_A00054400010 - EEG Shape: torch.Size([194404, 111])


 Extracting Features:  59%|█████▉    | 119/201 [1:30:52<1:17:06, 56.42s/subject]

?? Processing gip_A00058775005 - EEG Shape: torch.Size([103851, 111])


 Extracting Features:  60%|█████▉    | 120/201 [1:31:26<1:07:17, 49.84s/subject]

?? Processing gp_A00054039004 - EEG Shape: torch.Size([123702, 111])


 Extracting Features:  60%|██████    | 121/201 [1:32:06<1:02:23, 46.80s/subject]

?? Processing gp_A00054647002 - EEG Shape: torch.Size([146854, 111])


 Extracting Features:  61%|██████    | 122/201 [1:32:54<1:01:58, 47.07s/subject]

?? Processing gp_A00055682004 - EEG Shape: torch.Size([162804, 111])


 Extracting Features:  61%|██████    | 123/201 [1:33:47<1:03:38, 48.95s/subject]

?? Processing gip_A00055077001 - EEG Shape: torch.Size([217955, 111])


 Extracting Features:  62%|██████▏   | 124/201 [1:34:57<1:10:57, 55.29s/subject]

?? Processing gp_A00062055013 - EEG Shape: torch.Size([61875, 111])


 Extracting Features:  62%|███████▍    | 125/201 [1:35:17<56:34, 44.67s/subject]

?? Processing bip_A00054694003 - EEG Shape: torch.Size([78327, 111])


 Extracting Features:  63%|███████▌    | 126/201 [1:35:43<48:44, 39.00s/subject]

?? Processing bp_A00054597011 - EEG Shape: torch.Size([137177, 111])


 Extracting Features:  63%|███████▌    | 127/201 [1:36:27<50:11, 40.69s/subject]

?? Processing gp_A00054852015 - EEG Shape: torch.Size([72675, 111])


 Extracting Features:  64%|███████▋    | 128/201 [1:36:51<43:18, 35.59s/subject]

?? Processing gp_A00054469013 - EEG Shape: torch.Size([174053, 111])


 Extracting Features:  64%|███████▋    | 129/201 [1:37:48<50:25, 42.01s/subject]

?? Processing oip_A00054852005 - EEG Shape: torch.Size([102877, 111])


 Extracting Features:  65%|███████▊    | 130/201 [1:38:21<46:29, 39.29s/subject]

?? Processing gip_A00053460009 - EEG Shape: torch.Size([146927, 111])


 Extracting Features:  65%|███████▊    | 131/201 [1:39:09<48:57, 41.96s/subject]

?? Processing gip_A00055745003 - EEG Shape: torch.Size([142651, 111])


 Extracting Features:  66%|███████▉    | 132/201 [1:39:56<49:48, 43.32s/subject]

?? Processing gp_A00056604008 - EEG Shape: torch.Size([105277, 111])


 Extracting Features:  66%|███████▉    | 133/201 [1:40:30<46:07, 40.71s/subject]

?? Processing oip_A00054923011 - EEG Shape: torch.Size([138902, 111])


 Extracting Features:  67%|████████    | 134/201 [1:41:15<46:42, 41.83s/subject]

?? Processing bip_A00053909002 - EEG Shape: torch.Size([121727, 111])


 Extracting Features:  67%|████████    | 135/201 [1:41:55<45:30, 41.37s/subject]

?? Processing gp_A00054930014 - EEG Shape: torch.Size([159227, 111])


 Extracting Features:  68%|████████    | 136/201 [1:42:47<48:07, 44.42s/subject]

?? Processing gip_A00056723003 - EEG Shape: torch.Size([75451, 111])


 Extracting Features:  68%|████████▏   | 137/201 [1:43:11<40:53, 38.34s/subject]

?? Processing gip_A00055055001 - EEG Shape: torch.Size([176391, 111])


 Extracting Features:  69%|████████▏   | 138/201 [1:44:08<46:05, 43.89s/subject]

?? Processing gp_A00055865003 - EEG Shape: torch.Size([137053, 111])


 Extracting Features:  69%|████████▎   | 139/201 [1:44:52<45:36, 44.13s/subject]

?? Processing oip_A00054215007 - EEG Shape: torch.Size([120377, 111])


 Extracting Features:  70%|████████▎   | 140/201 [1:45:30<43:04, 42.38s/subject]

?? Processing bp_A00056428003 - EEG Shape: torch.Size([75526, 111])


 Extracting Features:  70%|████████▍   | 141/201 [1:45:55<37:08, 37.15s/subject]

?? Processing gip_A00059063016 - EEG Shape: torch.Size([81952, 111])


 Extracting Features:  71%|████████▍   | 142/201 [1:46:22<33:20, 33.91s/subject]

?? Processing oip_A00055540012 - EEG Shape: torch.Size([157403, 111])


 Extracting Features:  71%|████████▌   | 143/201 [1:47:13<37:47, 39.09s/subject]

?? Processing bip_A00056002014 - EEG Shape: torch.Size([194554, 111])


 Extracting Features:  72%|████████▌   | 144/201 [1:48:15<43:45, 46.06s/subject]

?? Processing gip_A00054023007 - EEG Shape: torch.Size([115603, 111])


 Extracting Features:  72%|████████▋   | 145/201 [1:48:53<40:35, 43.49s/subject]

?? Processing bp_A00054647007 - EEG Shape: torch.Size([104600, 111])


 Extracting Features:  73%|████████▋   | 146/201 [1:49:26<37:00, 40.38s/subject]

?? Processing gip_A00054387005 - EEG Shape: torch.Size([105827, 111])


 Extracting Features:  73%|████████▊   | 147/201 [1:50:00<34:44, 38.60s/subject]

?? Processing gip_A00054743005 - EEG Shape: torch.Size([187103, 111])


 Extracting Features:  74%|████████▊   | 148/201 [1:51:00<39:46, 45.03s/subject]

?? Processing oip_A00062453009 - EEG Shape: torch.Size([130903, 111])


 Extracting Features:  74%|████████▉   | 149/201 [1:51:42<38:11, 44.06s/subject]

?? Processing gip_A00056166001 - EEG Shape: torch.Size([264956, 111])


 Extracting Features:  75%|████████▉   | 150/201 [1:53:07<47:56, 56.40s/subject]

?? Processing gp_A00054743010 - EEG Shape: torch.Size([147103, 111])


 Extracting Features:  75%|█████████   | 151/201 [1:53:55<44:54, 53.88s/subject]

?? Processing oip_A00055754010 - EEG Shape: torch.Size([152703, 111])


 Extracting Features:  76%|█████████   | 152/201 [1:54:45<42:54, 52.54s/subject]

?? Processing oip_A00055424005 - EEG Shape: torch.Size([103852, 111])


 Extracting Features:  76%|█████████▏  | 153/201 [1:55:19<37:35, 46.98s/subject]

?? Processing gip_A00054359006 - EEG Shape: torch.Size([117065, 111])


 Extracting Features:  77%|█████████▏  | 154/201 [1:55:57<34:42, 44.31s/subject]

?? Processing gip_A00054836005 - EEG Shape: torch.Size([105452, 111])


 Extracting Features:  77%|█████████▎  | 155/201 [1:56:31<31:34, 41.18s/subject]

?? Processing oip_A00056723004 - EEG Shape: torch.Size([146579, 111])


 Extracting Features:  78%|█████████▎  | 156/201 [1:57:19<32:33, 43.41s/subject]

?? Processing gip_A00054359010 - EEG Shape: torch.Size([139928, 111])


 Extracting Features:  78%|█████████▎  | 157/201 [1:58:04<32:12, 43.92s/subject]

?? Processing gp_A00054894005 - EEG Shape: torch.Size([170429, 111])


 Extracting Features:  79%|█████████▍  | 158/201 [1:58:59<33:45, 47.11s/subject]

?? Processing gip_A00063558001 - EEG Shape: torch.Size([191229, 111])


 Extracting Features:  79%|█████████▍  | 159/201 [2:00:00<35:54, 51.29s/subject]

?? Processing oip_A00059063004 - EEG Shape: torch.Size([137652, 111])


 Extracting Features:  80%|█████████▌  | 160/201 [2:00:45<33:41, 49.30s/subject]

?? Processing gip_A00053990003 - EEG Shape: torch.Size([153728, 111])


 Extracting Features:  80%|█████████▌  | 161/201 [2:01:35<32:59, 49.50s/subject]

?? Processing gip_A00054488004 - EEG Shape: torch.Size([94952, 111])


 Extracting Features:  81%|█████████▋  | 162/201 [2:02:06<28:33, 43.94s/subject]

?? Processing bp_A00054721008 - EEG Shape: torch.Size([96577, 111])


 Extracting Features:  81%|█████████▋  | 163/201 [2:02:36<25:20, 40.01s/subject]

?? Processing gip_A00062453003 - EEG Shape: torch.Size([233805, 111])


 Extracting Features:  82%|█████████▊  | 164/201 [2:03:52<31:17, 50.73s/subject]

?? Processing bp_A00057599006 - EEG Shape: torch.Size([116402, 111])


 Extracting Features:  82%|█████████▊  | 165/201 [2:04:30<28:05, 46.83s/subject]

?? Processing gp_A00054469001 - EEG Shape: torch.Size([268476, 111])


 Extracting Features:  83%|█████████▉  | 166/201 [2:05:56<34:08, 58.53s/subject]

?? Processing bp_A00055436005 - EEG Shape: torch.Size([109453, 111])


 Extracting Features:  83%|█████████▉  | 167/201 [2:06:31<29:15, 51.65s/subject]

?? Processing oip_A00055392005 - EEG Shape: torch.Size([103727, 111])


 Extracting Features:  84%|██████████  | 168/201 [2:07:06<25:31, 46.40s/subject]

?? Processing gip_A00054488009 - EEG Shape: torch.Size([221030, 111])


 Extracting Features:  84%|██████████  | 169/201 [2:08:16<28:37, 53.69s/subject]

?? Processing gip_A00055837009 - EEG Shape: torch.Size([103152, 111])


 Extracting Features:  85%|██████████▏ | 170/201 [2:08:50<24:35, 47.61s/subject]

?? Processing bip_A00056693014 - EEG Shape: torch.Size([195953, 111])


 Extracting Features:  85%|██████████▏ | 171/201 [2:09:53<26:10, 52.36s/subject]

?? Processing gip_A00053597010 - EEG Shape: torch.Size([159604, 111])


 Extracting Features:  86%|██████████▎ | 172/201 [2:10:45<25:11, 52.13s/subject]

?? Processing gip_A00054517009 - EEG Shape: torch.Size([152979, 111])


 Extracting Features:  86%|██████████▎ | 173/201 [2:11:34<23:56, 51.31s/subject]

?? Processing bip_A00055436007 - EEG Shape: torch.Size([113152, 111])


 Extracting Features:  87%|██████████▍ | 174/201 [2:12:11<21:07, 46.94s/subject]

?? Processing oip_A00055628015 - EEG Shape: torch.Size([172053, 111])


 Extracting Features:  87%|██████████▍ | 175/201 [2:13:06<21:25, 49.46s/subject]

?? Processing gp_A00054469002 - EEG Shape: torch.Size([119402, 111])


 Extracting Features:  88%|██████████▌ | 176/201 [2:13:45<19:16, 46.27s/subject]

?? Processing gip_A00055865005 - EEG Shape: torch.Size([107427, 111])


 Extracting Features:  88%|██████████▌ | 177/201 [2:14:20<17:10, 42.96s/subject]

?? Processing gip_A00057092004 - EEG Shape: torch.Size([74025, 111])


 Extracting Features:  89%|██████████▋ | 178/201 [2:14:44<14:14, 37.15s/subject]

?? Processing gp_A00053990014 - EEG Shape: torch.Size([157405, 111])


 Extracting Features:  89%|██████████▋ | 179/201 [2:15:35<15:10, 41.37s/subject]

?? Processing oip_A00055865007 - EEG Shape: torch.Size([143728, 111])


 Extracting Features:  90%|██████████▋ | 180/201 [2:16:22<15:03, 43.01s/subject]

?? Processing gp_A00063377 004 - EEG Shape: torch.Size([222004, 111])


 Extracting Features:  90%|██████████▊ | 181/201 [2:17:34<17:13, 51.69s/subject]

?? Processing gip_A00054597005 - EEG Shape: torch.Size([107251, 111])


 Extracting Features:  91%|██████████▊ | 182/201 [2:18:08<14:42, 46.47s/subject]

?? Processing bip_A00056054011 - EEG Shape: torch.Size([124253, 111])


 Extracting Features:  91%|██████████▉ | 183/201 [2:18:49<13:25, 44.75s/subject]

?? Processing oip_A00055628001 - EEG Shape: torch.Size([39752, 111])


 Extracting Features:  92%|██████████▉ | 184/201 [2:19:02<09:57, 35.15s/subject]

?? Processing gip_A00055623014 - EEG Shape: torch.Size([197578, 111])


 Extracting Features:  92%|███████████ | 185/201 [2:20:06<11:41, 43.83s/subject]

?? Processing gip_A00056166004 - EEG Shape: torch.Size([148128, 111])


 Extracting Features:  93%|███████████ | 186/201 [2:20:54<11:18, 45.26s/subject]

?? Processing oip_A00056054004 - EEG Shape: torch.Size([129628, 111])


 Extracting Features:  93%|███████████▏| 187/201 [2:21:37<10:23, 44.56s/subject]

?? Processing bip_A00053398_part2003 - EEG Shape: torch.Size([107527, 111])


 Extracting Features:  94%|███████████▏| 188/201 [2:22:12<09:01, 41.64s/subject]

?? Processing oip_A00055103007 - EEG Shape: torch.Size([106377, 111])


 Extracting Features:  94%|███████████▎| 189/201 [2:22:47<07:55, 39.59s/subject]

?? Processing gip_A00056166015 - EEG Shape: torch.Size([171852, 111])


 Extracting Features:  95%|███████████▎| 190/201 [2:23:43<08:08, 44.45s/subject]

?? Processing gip_A00054488014 - EEG Shape: torch.Size([233630, 111])


 Extracting Features:  95%|███████████▍| 191/201 [2:24:58<08:58, 53.85s/subject]

?? Processing oip_A00055024010 - EEG Shape: torch.Size([157454, 111])


 Extracting Features:  96%|███████████▍| 192/201 [2:25:50<07:58, 53.22s/subject]

?? Processing gip_A00062919 004 - EEG Shape: torch.Size([220204, 111])


 Extracting Features:  96%|███████████▌| 193/201 [2:27:04<07:54, 59.33s/subject]

?? Processing gip_A00054535003 - EEG Shape: torch.Size([83826, 111])


 Extracting Features:  97%|███████████▌| 194/201 [2:27:32<05:49, 49.94s/subject]

?? Processing gip_A00053990006 - EEG Shape: torch.Size([58525, 111])


 Extracting Features:  97%|███████████▋| 195/201 [2:27:51<04:04, 40.79s/subject]

?? Processing oip_A00055910001 - EEG Shape: torch.Size([211226, 111])


 Extracting Features:  98%|███████████▋| 196/201 [2:29:02<04:08, 49.68s/subject]

?? Processing oip_A00056257013 - EEG Shape: torch.Size([170702, 111])


 Extracting Features:  98%|███████████▊| 197/201 [2:29:58<03:26, 51.74s/subject]

?? Processing gip_A00054659003 - EEG Shape: torch.Size([375459, 111])


 Extracting Features:  99%|███████████▊| 198/201 [2:32:02<03:39, 73.27s/subject]

?? Processing gip_A00054894014 - EEG Shape: torch.Size([146426, 111])


 Extracting Features:  99%|███████████▉| 199/201 [2:32:51<02:12, 66.10s/subject]

?? Processing gip_A00054917003 - EEG Shape: torch.Size([334357, 111])


 Extracting Features: 100%|███████████▉| 200/201 [2:34:43<01:19, 79.82s/subject]

?? Processing gip_A00062329004 - EEG Shape: torch.Size([215630, 111])


 Extracting Features: 100%|████████████| 201/201 [2:35:55<00:00, 46.55s/subject]

? Feature Extraction Complete!





In [None]:
def unzip_pt_files(preprocessed_path):
    """Loads compressed .pt files and re-saves them uncompressed."""
    unzipped_path = preprocessed_path + "_unzipped"
    os.makedirs(unzipped_path, exist_ok=True)

    for file in tqdm(os.listdir(preprocessed_path), desc=" Unzipping .pt files"):
        if file.endswith(".pt"):
            file_path = os.path.join(preprocessed_path, file)
            save_path = os.path.join(unzipped_path, file)

            try:
                #  Load compressed file
                data = torch.load(file_path, map_location="cpu")

                #  Re-save uncompressed file
                torch.save(data, save_path, _use_new_zipfile_serialization=False)

                print(f" Unzipped and saved: {save_path}")

            except Exception as e:
                print(f" Failed to unzip {file}: {e}")

    print(f" All .pt files unzipped to: {unzipped_path}")
    return unzipped_path


In [None]:
# Define your preprocessed EEG files
unzipped_path = "/data/ayesha/PhD/dataset/EEG/preprocessed_unzipped"
preprocessed_data = {
    f.replace("_preprocessed.pt", ""): os.path.join(unzipped_path, f)
    for f in os.listdir(unzipped_path) if f.endswith(".pt")
}

# Run Feature Extraction
save_features(preprocessed_data)

? All EEG files have already been processed! No new processing required.
