In [1]:
!pip install mne

Collecting mne
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Downloading mne-1.9.0-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mne
Successfully installed mne-1.9.0


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import numpy as np
import pandas as pd
import mne
from sklearn.preprocessing import StandardScaler

In [4]:
drive_path = "/content/drive/MyDrive/FileStore/"
source_folder = drive_path + "eeg/"
destination_folder = drive_path + "preprocessed_eeg/"
# destination_folder = drive_path + "testing/test/"
demographic_path = drive_path + 'demographic.csv'

os.makedirs(destination_folder, exist_ok=True)

eeg_channels = ["Fz", "FCz", "Cz", "CPz", "Pz"]
desired_timepoints = 250
sampling_freq = 250

In [5]:
!ls /content/drive/MyDrive/FileStore/eeg/


100.csv  129.csv  157.csv  185.csv  212.csv  240.csv  269.csv  297.csv	324.csv  47.csv  75.csv
101.csv  12.csv   158.csv  186.csv  213.csv  241.csv  26.csv   298.csv	325.csv  48.csv  76.csv
102.csv  130.csv  159.csv  187.csv  214.csv  242.csv  270.csv  299.csv	326.csv  49.csv  77.csv
103.csv  131.csv  15.csv   188.csv  215.csv  243.csv  271.csv  29.csv	327.csv  4.csv	 78.csv
104.csv  132.csv  160.csv  189.csv  216.csv  244.csv  272.csv  2.csv	328.csv  50.csv  79.csv
105.csv  133.csv  161.csv  18.csv   217.csv  245.csv  273.csv  300.csv	329.csv  51.csv  7.csv
106.csv  134.csv  162.csv  190.csv  218.csv  246.csv  274.csv  301.csv	32.csv	 52.csv  80.csv
107.csv  135.csv  163.csv  191.csv  219.csv  247.csv  275.csv  302.csv	330.csv  53.csv  81.csv
108.csv  136.csv  164.csv  192.csv  21.csv   248.csv  276.csv  303.csv	331.csv  54.csv  82.csv
109.csv  137.csv  165.csv  193.csv  220.csv  249.csv  277.csv  304.csv	332.csv  55.csv  83.csv
10.csv	 138.csv  166.csv  194.csv  221.csv  24.csv   278

In [None]:
demographic_df = pd.read_csv(demographic_path)
demographic_dict = dict(zip(demographic_df['subject'].astype(str), demographic_df['group']))

os.makedirs(destination_folder, exist_ok=True)

csv_files = sorted([f for f in os.listdir(source_folder) if f.endswith(".csv")])

eeg_channels = ["Fz", "FCz", "Cz", "CPz", "Pz"]
desired_timepoints = 250
sampling_freq = 250

for file_name in csv_files:
    try:
        subject_id = file_name.replace(".csv", "")

        if subject_id not in demographic_dict:
            print(f"⚠️ Skipping {file_name}: Subject ID not found in demographic.csv.")
            continue

        source_file_path = os.path.join(source_folder, file_name)
        destination_file_path = os.path.join(destination_folder, file_name.replace(".csv", ".npy"))

        print(f"\n🔄 Processing: {file_name}")

        data_df = pd.read_csv(source_file_path)

        trials = []
        trial_labels = []

        for (subject, trial), group in data_df.groupby(["subject", "trial"]):
            eeg_data = group[eeg_channels].to_numpy().T

            if eeg_data.shape[1] > desired_timepoints:
                eeg_data = eeg_data[:, :desired_timepoints]  # Truncate longer trials
            elif eeg_data.shape[1] < desired_timepoints:
                padding = np.zeros((eeg_data.shape[0], desired_timepoints - eeg_data.shape[1]))
                eeg_data = np.hstack((eeg_data, padding))  # Pad shorter trials

            trials.append(eeg_data)
            trial_labels.append(demographic_dict[subject_id])  # Assign label

        X = np.stack(trials)
        y = np.array(trial_labels)

        # Create MNE info object
        info = mne.create_info(ch_names=eeg_channels, sfreq=sampling_freq, ch_types="eeg")

        preprocessed_trials = []
        for trial_data in X:
            raw = mne.io.RawArray(trial_data, info)
            raw.notch_filter(freqs=50, trans_bandwidth=4, filter_length="auto")
            raw.filter(1, 30, fir_design="firwin", l_trans_bandwidth=0.5, h_trans_bandwidth=8, filter_length="auto")
            raw.set_eeg_reference("average")

            preprocessed_trials.append(raw.get_data())

        X_preprocessed = np.array(preprocessed_trials)

        # Normalize data
        scaler = StandardScaler()
        X_preprocessed = scaler.fit_transform(X_preprocessed.reshape(len(X_preprocessed), -1)).reshape(X_preprocessed.shape)

        # Save each file separately
        np.save(destination_file_path, X_preprocessed)
        np.save(destination_file_path.replace(".npy", "_label.npy"), y)

        print(f"✅ Processed & saved: {destination_file_path}")

    except Exception as e:
        print(f"❌ Error processing {file_name}: {e}")

print(f"✅ All Processed & saved !!!!")