In [None]:
import os
import mne
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from RESTutils import find_data_start,data_process

In [None]:
# Parameters
fs=512
epoch_length=4 # seconds
# Folder path
folder_path = r'M:\Alex\Python\REST\Test' # Specify the folder path containing the both EDF and score files, the files should be named like 'subject_1.edf' and 'subject_1.tsv'
save_path = r'M:\Alex\Python\REST\Test' # Specify the folder path to save the processed data
file_name = "processed_data.npy"         # your filename
full_path = os.path.join(save_path, file_name)

EEG_channel_name = 'RF' # Name of the EEG channel to use, change if needed
EMG_channel_name = 'EMG' # Name of the EMG channel to use, change if needed

In [None]:
# Initialize lists
fp_tsv = []
fp_edf = []

# Scan the folder and add files to the lists
for file in sorted(os.listdir(folder_path)):
    if file.endswith('.tsv'):
        fp_tsv.append(os.path.join(folder_path, file))
    elif file.endswith('.edf'):
        fp_edf.append(os.path.join(folder_path, file))

# Ensure the files are in pairs and in correct order
fp_tsv.sort()
fp_edf.sort()

# Check if the pairs are correct
for edf_file, tsv_file in zip(fp_edf, fp_tsv):
    edf_base = os.path.basename(edf_file).rsplit('_', 1)[0]
    tsv_base = os.path.basename(tsv_file).rsplit('_', 1)[0]
    if edf_base != tsv_base:
        print(f"Mismatch: {edf_file} and {tsv_file}")

pair_lengths = []
for tsv_file, edf_file in zip(fp_tsv, fp_edf):
    start_line = find_data_start(tsv_file, sep='\t', expected_columns=5)
    df = pd.read_csv(tsv_file, sep='\t', skiprows=start_line + 1, header=None)
    score = df.iloc[:, 4].to_numpy()
    score_length = len(score)  # This is the number of score epochs (21600 after padding)
    raw = mne.io.read_raw_edf(edf_file, preload=False)
    n_samples=raw.n_times
    eeg_epochs = n_samples // (fs * 4) 
    pair_lengths.append((score_length, eeg_epochs))

# %%
score_additional = []
EEG_additional = []
EMG_additional = []
for i, (score_length, eeg_epochs) in tqdm(enumerate(pair_lengths)):        
    # Read the TSV file for score, if your score file has a different structure, you may need to adjust this part
    start_line = find_data_start(fp_tsv[i], sep='\t', expected_columns=5)
    df = pd.read_csv(fp_tsv[i], sep='\t', skiprows=start_line+1, header=None)#questionable, may need to change
    score = df.iloc[:, 4].to_numpy()
    score[score == 0] = 1
    score[score > 3] = 1
    # read the edf file    
    raw = mne.io.read_raw_edf(fp_edf[i], preload=True) 
    channel_name=raw.info.ch_names # get the channel names
    idx=[index for index, name in enumerate (channel_name) if EEG_channel_name in name] # find the index of the RF channel
    EEG=raw.get_data(idx) # get the RF channel       
    idx=[index for index, name in enumerate (channel_name) if EMG_channel_name in name] # find the index of the EMG channel
    EMG=raw.get_data(idx)   # get the EMG channel
    EEG,EMG=data_process(EEG,EMG) # process the data
    # Compare lengths and cut the longer one to match the shorter one
    min_length = min(len(EEG), len(EMG), len(score))
    EEG = EEG[:min_length]
    EMG = EMG[:min_length]
    score = score[:min_length]  
    EEG=EEG.astype(np.float32)
    EMG=EMG.astype(np.float32)
    score=score.astype(np.float32)

    if len(score_additional) == 0:
        score_additional = score
    else:
        score_additional = np.concatenate((score_additional, score), axis=0)
        
    if len(EEG_additional) == 0:
        EEG_additional = EEG
    else:
        EEG_additional = np.concatenate((EEG_additional, EEG), axis=0)
        
    if len(EMG_additional) == 0:
        EMG_additional = EMG
    else:
        EMG_additional = np.concatenate((EMG_additional, EMG), axis=0)
np.savez(full_path  , EEG=EEG_additional, EMG=EMG_additional, score=score_additional)