In [1]:
import numpy as np
import os
from typing import Tuple

def load_eeg_data(dataset_path: str, class_labels: list) -> Tuple[np.ndarray, np.ndarray]:
    """
    Load EEG data and corresponding labels from the dataset directory.

    Parameters:
        dataset_path (str): Path to the dataset directory.
        class_labels (list): List of class label names corresponding to subfolders.

    Returns:
        Tuple[np.ndarray, np.ndarray]: A tuple containing data (X) and labels (y).
    """
    X, y = [], []

    for label, class_name in enumerate(class_labels):
        class_folder = os.path.join(dataset_path, class_name)
        if not os.path.exists(class_folder):
            print(f"Warning: Class folder {class_folder} does not exist.")
            continue

        for file_name in os.listdir(class_folder):
            if file_name.endswith(".npy"):
                file_path = os.path.join(class_folder, file_name)
                data = np.load(file_path)
                X.append(data)
                y.append(label)

    # Convert lists to numpy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y

def main():
    dataset_path = "/kaggle/input/eeg-train/train_data"

    # Define class labels according to the provided table
    class_labels = [
        "Normal",
        "Complex_Partial_Seizures",
        "Electrographic_Seizures",
        "Video_detected_Seizures_with_no_visual_change_over_EEG",
    ]

    # Load the data
    X, y = load_eeg_data(dataset_path, class_labels)

    # Print shapes to verify
    print(f"Data shape: {X.shape}")
    print(f"Labels shape: {y.shape}")

if __name__ == "__main__":
    main()


Data shape: (5608, 19, 500)
Labels shape: (5608,)
