In [None]:
import os
import mne
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from RESTutils import data_process

In [None]:
# Parameters
fs=512
epoch_length=4 # seconds
# Folder path
folder_path = r'M:\Alex\Python\REST\Test' # Specify the folder path containing the both EDF and score files, the files should be named like 'subject_1.edf' and 'subject_1.tsv'
save_path = r'M:\Alex\Python\REST\Test' # Specify the folder path to save the processed data
file_name = "processed_data.npy"         # your filename
full_path = os.path.join(save_path, file_name)

EEG_channel_name = 'RF' # Name of the EEG channel to use, change if needed
EMG_channel_name = 'EMG' # Name of the EMG channel to use, change if needed

In [None]:
# Scan the folder and add files to the lists
fp_score = []
fp_edf = []

for file in sorted(os.listdir(folder_path)):
    if file.endswith('.txt'):
        fp_score.append(os.path.join(folder_path, file))
    elif file.endswith('.edf'):
        fp_edf.append(os.path.join(folder_path, file))

# Ensure the files are in pairs and in correct order
fp_score.sort()
fp_edf.sort()

# Check if the pairs are correct
for edf_file, tsv_file in zip(fp_edf, fp_score):
    edf_base = os.path.basename(edf_file).rsplit('_', 1)[0]
    tsv_base = os.path.basename(tsv_file).rsplit('_', 1)[0]
    if edf_base != tsv_base:
        print(f"Mismatch: {edf_file} and {tsv_file}")

In [None]:
score_og = []
EEG_og = []
EMG_og = []

for score_file,fp in tqdm(zip(fp_score,fp_edf), total=len(fp_score)): # loop through all the edf files
    df = pd.read_csv(score_file, delimiter=',')
    score = df.iloc[:, 3].to_numpy()
    score[score > 3] = -100  # mark unscored as -100, just in case they exist (they shouldn't), those will be ignored during training
    score[score == 0] = 4 # move artifact from 0 to 4
    
    raw = mne.io.read_raw_edf(fp, preload=True) # read the edf file
    channel_name=raw.info.ch_names # get the channel names
    idx=[index for index, name in enumerate (channel_name) if 'RF' in name] # find the index of the RF channel
    EEG=raw.get_data(idx) # get the RF channel  
    idx=[index for index, name in enumerate (channel_name) if 'EMG' in name] # find the index of the EMG channel
    EMG=raw.get_data(idx)   # get the EMG channel
    EEG,EMG=data_process(EEG,EMG) # process the EEG and EMG signals
    # Compare lengths and cut the longer one to match the shorter one
    min_length = min(len(EEG),len(EMG), len(score))
    EEG = EEG[:min_length]
    EMG = EMG[:min_length]
    score = score[:min_length]
    
    EEG=EEG.astype(np.float32)
    EMG=EMG.astype(np.float32)
    score=score.astype(np.float32)
    
    if len(score_og) == 0:
        score_og = score
    else:
        score_og = np.concatenate((score_og, score), axis=0)
        
    if len(EEG_og) == 0:
        EEG_og = EEG
    else:
        EEG_og = np.concatenate((EEG_og, EEG), axis=0)
        
    if len(EMG_og) == 0:
        EMG_og = EMG
    else:
        EMG_og = np.concatenate((EMG_og, EMG), axis=0)
np.savez(full_path  , EEG=EEG_og, EMG=EMG_og, score=score_og)