# Child extraction

## Extract timings from trigger points in baseline EEG recordings

In [None]:
import mne
import pandas as pd
import os
import datetime

# File paths to EEG and ECG data directories (these paths need to be updated)
EEG_file_path = '...'
ECG_file_path = '...'

# Load baseline data from an Excel file containing blink record alignments
baseline_df = pd.read_excel('blink_record_align.xlsx')

# Initialize an empty DataFrame to store the processed output
raw_output_df = pd.DataFrame()

# Iterate over each row in the baseline DataFrame
for i, row in baseline_df.iterrows():
    
    # Extract the participant ID (PID) from the current row
    PID = str(row['PID'])

    # Find the corresponding EEG file (vhdr) for the current participant with 'baseline' in the filename
    eeg_f = [f for f in os.listdir(EEG_file_path) if (PID in f) and (f.endswith('.vhdr')) and (('baseline' in f) | ('Baseline' in f))]

    # Find the corresponding ECG file (edf) for the current participant with 'baseline' in the filename
    ecg_f = [f for f in os.listdir(ECG_file_path) if (PID in f) and (f.endswith('.edf')) and (('baseline' in f) | ('Baseline' in f))]

    # If no EEG file is found, skip this participant and print their PID
    if not eeg_f:
        print(PID)
        continue
    else:
        # Construct the full path for the EEG file and read it using MNE
        fpath = f'{EEG_file_path}/{eeg_f[0]}'
        raw = mne.io.read_raw_brainvision(fpath)

        # Extract the measurement date and time from the EEG data
        raw_datetime = raw.info['meas_date']

        # If no ECG file is found, create one by exporting the EEG data as EDF
        if not ecg_f:
            ecg_f = eeg_f[0].replace('vhdr', 'edf')
            ecg_export_fpath = f'{ECG_file_path}/{ecg_f}'
            mne.export.export_raw(ecg_export_fpath, raw)

        # Extract event annotations from the EEG data and convert them into a DataFrame
        events = mne.events_from_annotations(raw, verbose=False)
        events = pd.DataFrame(events[0], columns=['samples', 'N/A', 'event_code'])

        # Check for event code 15 (baseline start) and calculate its time if present
        if 15 in events['event_code'].unique():
            baseline_start_seconds = events.loc[events['event_code'] == 15, 'samples'].values[0] / 1000
            baseline_start = raw_datetime + datetime.timedelta(seconds=baseline_start_seconds)
            baseline_start_time = baseline_start.strftime("%H:%M:%S.%f")
        else:
            baseline_start_time = 99999  # Default value if the event code is missing

        # Check for event code 11 (CE baseline start) and calculate its time if present
        if 11 in events['event_code'].unique():
            baseline_start_seconds = events.loc[events['event_code'] == 11, 'samples'].values[0] / 1000
            baseline_start = raw_datetime + datetime.timedelta(seconds=baseline_start_seconds)
            CE_baseline_start_time = baseline_start.strftime("%H:%M:%S.%f")
        else:
            CE_baseline_start_time = 99999  # Default value if the event code is missing

        # Record the measurement date and time
        rec_date = raw_datetime.strftime("%d/%m/%Y")
        rec_time = raw_datetime.strftime("%H:%M:%S.%f")

        # Append the processed data to the output DataFrame
        raw_output_df = pd.concat([raw_output_df, pd.DataFrame({
            'PID': PID,
            'EEG_fname': eeg_f,
            'rec_date': rec_date,
            'rec_time': rec_time,
            'ECG_fname': ecg_f,
            'baseline_start': baseline_start_time,
            'CE_baseline_start': CE_baseline_start_time
        }, index=[0])])

# Convert PID column to integer type and merge baseline data with the output DataFrame
raw_output_df = raw_output_df.astype({'PID': 'int32'})
raw_output_df = pd.merge(baseline_df, raw_output_df)

# Filter the data to include only rows where 'HR Baseline Record' is marked as 'Yes'
output_df = raw_output_df[raw_output_df['HR Baseline Record'] == 'Yes']

# Initialize an empty DataFrame for storing sample output
sample_output = pd.DataFrame()

# Process each row in the filtered output DataFrame
for i, row in output_df.iterrows():
    
    # Get the ECG filename and initialize time style
    sample_fname = row['ECG_fname']
    sample_time_style = 0

    # Calculate start times for baseline and the following 90 and 180 seconds
    start_time = datetime.datetime.strptime(row['baseline_start'], "%H:%M:%S.%f").replace(microsecond=0)
    start_time_90 = start_time + datetime.timedelta(seconds=90)
    start_time_180 = start_time + datetime.timedelta(seconds=180)

    # Convert the start times to string format (H:M:S)
    start_time = start_time.strftime("%H:%M:%S")
    start_time_90 = start_time_90.strftime("%H:%M:%S")
    start_time_180 = start_time_180.strftime("%H:%M:%S")

    # Define labels and corresponding start/end times for baseline periods
    input_labels = ['baseline_overall', 'baseline_1', 'baseline_2']
    input_start_times = [start_time, start_time, start_time_90]
    input_end_times = [start_time_180, start_time_90, start_time_180]

    # Create a dictionary to store the time segments and append to the sample output DataFrame
    input_dict = {
        'fname': sample_fname,
        'tstyle': sample_time_style,
        'label_o': input_labels[0], 'start_o': input_start_times[0], 'end_o': input_end_times[0],
        'label_1': input_labels[1], 'start_1': input_start_times[1], 'end_1': input_end_times[1],
        'label_2': input_labels[2], 'start_2': input_start_times[2], 'end_2': input_end_times[2]
    }
    sample_output = pd.concat([sample_output, pd.DataFrame(input_dict, index=[0])])

# Repeat the process for 'EEG Baseline Record' marked as 'Yes'
CE_output_df = raw_output_df[raw_output_df['EEG Baseline Record'] == 'Yes']
CE_sample_output = pd.DataFrame()

for i, row in CE_output_df.iterrows():
    
    # Get the ECG filename and initialize time style
    sample_fname = row['ECG_fname']
    sample_time_style = 0

    # Calculate start times for CE baseline and the following 90 and 180 seconds
    start_time = datetime.datetime.strptime(row['CE_baseline_start'], "%H:%M:%S.%f").replace(microsecond=0)
    start_time_90 = start_time + datetime.timedelta(seconds=90)
    start_time_180 = start_time + datetime.timedelta(seconds=180)

    # Convert the start times to string format (H:M:S)
    start_time = start_time.strftime("%H:%M:%S")
    start_time_90 = start_time_90.strftime("%H:%M:%S")
    start_time_180 = start_time_180.strftime("%H:%M:%S")

    # Define labels and corresponding start/end times for CE baseline periods
    input_labels = ['CE_baseline_overall', 'CE_baseline_1', 'CE_baseline_2']
    input_start_times = [start_time, start_time, start_time_90]
    input_end_times = [start_time_180, start_time_90, start_time_180]

    # Create a dictionary to store the time segments and append to the CE sample output DataFrame
    input_dict = {
        'fname': sample_fname,
        'label_o': input_labels[0], 'start_o': input_start_times[0], 'end_o': input_end_times[0],
        'label_1': input_labels[1], 'start_1': input_start_times[1], 'end_1': input_end_times[1],
        'label_2': input_labels[2], 'start_2': input_start_times[2], 'end_2': input_end_times[2]
    }
    CE_sample_output = pd.concat([CE_sample_output, pd.DataFrame(input_dict, index=[0])])

# Merge the sample outputs from both baseline and CE baseline data
output_sample = pd.merge(sample_output, CE_sample_output, how="right", on='fname')


## Extract sample points from learn/test trial outputs

In [None]:
import pandas as pd
import datetime
import os

# File paths to behavioural (BEH) and ECG data directories (these paths need to be updated)
fpath = '...'
ECG_fpath = '...'

# Initialize an empty DataFrame to store output data
sample_output = pd.DataFrame()

# Load the list of participant IDs from an Excel file containing blink record alignments
pid_list = pd.read_excel('blink_record_align.xlsx')
pid_list = pid_list['PID']

# Loop through each participant ID
for PID in pid_list:

    # Find the corresponding behavioural (csv) file for the participant
    fname = [f for f in os.listdir(fpath) if (f.startswith(str(PID))) and (f.endswith('.csv'))]

    # Find the corresponding ECG (edf) file for the participant with 'test' or 'Test' in the filename
    ecg_fname = [f for f in os.listdir(ECG_fpath) if (f.startswith(str(PID))) and (f.endswith('.edf') and (('test' in f) | ('Test' in f)))]

    # Special handling for participant ID 161 where multiple ECG files exist
    if PID == 161:
        print(ecg_fname)
        print(type(ecg_fname))
        del ecg_fname[1]  # Delete the second file in the list
        print(ecg_fname)
        print(type(ecg_fname))

    # Skip to the next participant if no ECG file is found
    if not ecg_fname:
        continue
    else:
        # If multiple ECG files exist, take the second one, otherwise take the first one
        if len(ecg_fname) > 1:
            ecg_fname = ecg_fname[1]
        else:
            ecg_fname = ecg_fname[0]

    # Skip to the next participant if no behavioural file is found
    if not fname:
        continue
    else:
        fname = fname[0]

    # Read the behavioural data CSV file for the current participant
    df_raw = pd.read_csv(f'{fpath}{fname}')

    # Extract the start time of the recording from the first row of the 'date' column
    start_time = df_raw['date'].iloc[0]
    date_format = "%Y-%m-%d_%Hh%M.%S.%f"
    start_datetime = datetime.datetime.strptime(start_time, date_format)

    # Initialize a temporary DataFrame to store the current participant's output
    temp_output = pd.DataFrame(index=[0])

    # Add the ECG filename and time style (0 by default) to the output
    temp_output['filename'] = ecg_fname
    temp_output['tstyle'] = 0

    ### Learn Phase Processing ###
    # Filter data for 'Fixation' and 'Learn' phases
    df_Fix = df_raw[(df_raw['trial_condition'] == 'Fixation')].reset_index(drop=True)
    df_Learn = df_raw[(df_raw['trial_condition'] == 'Learn')].reset_index(drop=True)
    
    # Get a list of unique blocks for the Learn phase
    block_list = df_Learn['block_n'].dropna().unique()

    # Process each block in the Learn phase
    for i in block_list:
        # Find the start and end times of the block
        df_start = df_Fix[df_Fix['block_n'] == i].reset_index(drop=True).iloc[0]
        df_end = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['fixationImage_2.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=df_end['image.stopped'])

        # Add block-specific learn phase data to the output
        temp_output[f'learn_{i}']  = f'learn_block_{i}'
        temp_output[f'learn_{i}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'learn_{i}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if i == block_list[0]:
            overall_start = datetime_start
        elif i == block_list[-1]:
            overall_end = datetime_end

            # Add overall learn phase data to the output
            temp_output[f'learn_O'] = 'learn_overall'
            temp_output[f'learn_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'learn_O_stop'] = overall_end.strftime("%H:%M:%S")

    ### Test Phase Processing ###
    # Filter data for the 'test' phase
    df_test = df_raw[df_raw['trial_condition'] == 'test'].reset_index(drop=True)

    # Get a list of unique blocks for the Test phase
    block_list = df_test['block_n'].dropna().unique()

    # Process each block in the Test phase
    for j in block_list:
        # Find the start and end times of the block
        df_start = df_test[df_test['block_n'] == j].reset_index(drop=True).iloc[0]
        df_end = df_test[df_test['block_n'] == j].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block (including reaction time for the end)
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['test_button.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=(df_end['test_button.started'] + df_end['test_button.rt']))

        # Add block-specific test phase data to the output
        temp_output[f'test_{j}']  = f'test_block_{j}'
        temp_output[f'test_{j}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'test_{j}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if j == block_list[0]:
            overall_start = datetime_start
        elif j == block_list[-1]:
            overall_end = datetime_end

            # Add overall test phase data to the output
            temp_output[f'test_O']  = 'test_overall'
            temp_output[f'test_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'test_O_stop'] = overall_end.strftime("%H:%M:%S")

    # Concatenate the current participant's data to the main output DataFrame
    sample_output = pd.concat([sample_output, temp_output], ignore_index=True)

# Save the final output to a CSV file (without headers and index)
sample_output.to_csv("child_test_samples.csv", header=False, index=False)


## Extract trigger points from real number

In [None]:
import pandas as pd
import datetime
import os
import numpy as np

# File paths to behavioural (BEH) and ECG data directories (these paths need to be updated)
fpath = '...' 
ECG_fpath = '...'

# Initialize an empty DataFrame to store output data
sample_output = pd.DataFrame()

# Load the list of participant IDs from an Excel file containing blink record alignments
pid_list = pd.read_excel('blink_record_align.xlsx')
pid_list = pid_list['PID']

# Loop through each participant ID
for PID in pid_list:

    # Find the corresponding behavioural (csv) file for the participant
    fname = [f for f in os.listdir(fpath) if (f.startswith(str(PID))) and (f.endswith('.csv'))]

    # Find the corresponding ECG (edf) file for the participant with specific keywords ('numeral', 'numeric', 'familiar') in the filename
    ecg_fname = [f for f in os.listdir(ECG_fpath) if (f.startswith(str(PID))) and (f.endswith('.edf') and (('numeral' in f) | ('numeric' in f) | ('familiar' in f)))]

    # Skip to the next participant if no ECG file is found
    if not ecg_fname:
        continue
    else:
        # If multiple ECG files exist, take the second one, otherwise take the first one
        if len(ecg_fname) > 1:
            ecg_fname = ecg_fname[1]
        else:
            ecg_fname = ecg_fname[0]
        
    # Skip to the next participant if no behavioural file is found
    if not fname:
        continue
    else:
        fname = fname[0]

    # Print the behavioural filename for debugging
    print(fname)

    # Read the behavioural data CSV file for the current participant
    df_raw = pd.read_csv(f'{fpath}{fname}')

    # Extract the start time of the recording from the first row of the 'date' column
    start_time = df_raw['date'].iloc[0]
    date_format = "%Y-%m-%d_%Hh%M.%S.%f"
    start_datetime = datetime.datetime.strptime(start_time, date_format)

    # Initialize a temporary DataFrame to store the current participant's output
    temp_output = pd.DataFrame(index=[0])

    # Add the ECG filename and time style (0 by default) to the output
    temp_output['filename'] = ecg_fname
    temp_output['tstyle'] = 0

    ### Learn Phase Processing ###
    # Filter data for the 'trial' phase and reset the index
    df_Learn = df_raw[(df_raw['display'] == 'trial')].reset_index(drop=True)
    
    # Create trial numbers (1 to n) and assign blocks (first half = block 1, second half = block 2)
    df_Learn['trial_n'] = range(1, (len(df_Learn) + 1))
    df_Learn['block_n'] = np.where(df_Learn['trial_n'] <= (len(df_Learn) / 2), 1, 2)

    # Get a list of unique blocks for the Learn phase
    block_list = df_Learn['block_n'].dropna().unique()

    # Process each block in the Learn phase
    for i in block_list:
        # Find the start and end times of the block
        df_start = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[0]
        df_end = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['fixation.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=(df_end['test_button.started'] + df_end['test_button.rt']))

        # Add block-specific learn phase data to the output
        temp_output[f'learn_{i}']  = f'learn_block_{i}'
        temp_output[f'learn_{i}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'learn_{i}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if i == block_list[0]:
            overall_start = datetime_start
        elif i == block_list[-1]:
            overall_end = datetime_end

            # Add overall learn phase data to the output
            temp_output[f'learn_O']  = 'learn_overall'
            temp_output[f'learn_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'learn_O_stop'] = overall_end.strftime("%H:%M:%S")

    # Concatenate the current participant's data to the main output DataFrame
    sample_output = pd.concat([sample_output, temp_output], ignore_index=True)

# Save the final output to a CSV file (without headers and index)
sample_output.to_csv("child_numeral_samples.csv", header=False, index=False)


# Adult Extraction

## Extract timings from trigger points in baseline EEG recordings

In [None]:
import mne
import pandas as pd
import os
import datetime

# File paths to EEG and ECG data directories (these paths need to be updated)
EEG_file_path = '...'
ECG_file_path = '...'

# Load baseline data from an Excel file containing blink record alignments for adults
baseline_df = pd.read_excel('adult_blink_record_align.xlsx')

# Initialize an empty DataFrame to store output data
raw_output_df = pd.DataFrame()

# Loop through each row in the baseline DataFrame
for i, row in baseline_df.iterrows():
    
    # Extract the participant ID (PID) from the current row
    PID = str(row['PID'])

    # Find the corresponding EEG file (vhdr) for the participant with 'baseline' in the filename
    eeg_f = [f for f in os.listdir(EEG_file_path) if (f.startswith(PID)) and (f.endswith('.vhdr')) and (('baseline' in f) | ('Baseline' in f))]

    # Find the corresponding ECG file (edf) for the participant with 'baseline' in the filename
    ecg_f = [f for f in os.listdir(ECG_file_path) if (f.startswith(PID)) and (f.endswith('.edf')) and (('baseline' in f) | ('Baseline' in f))]

    # Skip to the next participant if no EEG file is found
    if not eeg_f:
        continue
    else:
        # Construct the full path for the EEG file and read it using MNE
        fpath = f'{EEG_file_path}/{eeg_f[0]}'
        raw = mne.io.read_raw_brainvision(fpath)

        # Extract the measurement date and time from the EEG data
        raw_datetime = raw.info['meas_date']

        # If no ECG file is found, create one by exporting the EEG data as EDF
        if not ecg_f:
            ecg_f = eeg_f[0].replace('vhdr', 'edf')
            ecg_export_fpath = f'{ECG_file_path}/{ecg_f}'
            mne.export.export_raw(ecg_export_fpath, raw)

        # Extract event annotations from the EEG data and convert them into a DataFrame
        events = mne.events_from_annotations(raw, verbose=False)
        events = pd.DataFrame(events[0], columns=['samples', 'N/A', 'event_code'])

        # Check for event code 15 (baseline start) and calculate its time if present
        if 15 in events['event_code'].unique():
            baseline_start_seconds = events.loc[events['event_code'] == 15, 'samples'].values[0] / 1000
            baseline_start = raw_datetime + datetime.timedelta(seconds=baseline_start_seconds)
            baseline_start_time = baseline_start.strftime("%H:%M:%S.%f")
        else:
            baseline_start_time = 99999  # Default value if the event code is missing

        # Record the measurement date and time
        rec_date = raw_datetime.strftime("%d/%m/%Y")
        rec_time = raw_datetime.strftime("%H:%M:%S.%f")

        # Append the processed data to the output DataFrame
        raw_output_df = pd.concat([raw_output_df, pd.DataFrame({
            'PID': PID,
            'EEG_fname': eeg_f,
            'rec_date': rec_date,
            'rec_time': rec_time,
            'ECG_fname': ecg_f,
            'baseline_start': baseline_start_time
        }, index=[0])])

# Merge baseline DataFrame with output DataFrame
raw_output_df = pd.merge(baseline_df, raw_output_df)

# Filter the merged DataFrame to include only rows with 'HR Baseline Record' marked as 'yes' and exclude PID 'A003'
output_df = raw_output_df[raw_output_df['HR Baseline Record'] == 'yes']
output_df = output_df[output_df['PID'] != 'A003']

# Initialize an empty DataFrame for storing sample output
sample_output = pd.DataFrame()

# Process each row in the filtered output DataFrame
for i, row in output_df.iterrows():

    # Get the ECG filename and initialize time style
    sample_fname = row['ECG_fname']
    sample_time_style = 0

    # Convert the baseline start time string to a datetime object
    start_time = datetime.datetime.strptime(row['baseline_start'], "%H:%M:%S.%f")
    start_time = start_time.replace(microsecond=0)

    # Calculate start times for 90 and 180 seconds after the baseline start
    start_time_90 = start_time + datetime.timedelta(seconds=90)
    start_time_180 = start_time + datetime.timedelta(seconds=180)

    # Convert the start times to string format (H:M:S)
    start_time = start_time.strftime("%H:%M:%S")
    start_time_90 = start_time_90.strftime("%H:%M:%S")
    start_time_180 = start_time_180.strftime("%H:%M:%S")

    # Define labels and corresponding start/end times for baseline periods
    input_labels = ['baseline_overall', 'baseline_1', 'baseline_2']
    input_start_times = [start_time, start_time, start_time_90]
    input_end_times = [start_time_180, start_time_90, start_time_180]

    # Create a dictionary to store the time segments and append to the sample output DataFrame
    input_dict = {
        'fname': sample_fname,
        'tstyle': sample_time_style,
        'label_o': input_labels[0], 'start_o': input_start_times[0], 'end_o': input_end_times[0],
        'label_1': input_labels[1], 'start_1': input_start_times[1], 'end_1': input_end_times[1],
        'label_2': input_labels[2], 'start_2': input_start_times[2], 'end_2': input_end_times[2]
    }
    sample_output = pd.concat([sample_output, pd.DataFrame(input_dict, index=[0])])

# Save the final output to a CSV file (without headers and index)
sample_output.to_csv("adult_baseline_samples.csv", header=False, index=False)


## Extract sample points from learn/test trial outputs

In [None]:
import pandas as pd
import datetime
import os

# File paths to behavioural (BEH) and ECG data directories (these paths need to be updated)
fpath = '...' 
ECG_fpath = '...'

# Initialize an empty DataFrame to store output data
sample_output = pd.DataFrame()

# Load the list of participant IDs from an Excel file containing blink record alignments for adults
pid_list = pd.read_excel('adult_blink_record_align.xlsx')
pid_list = pid_list['PID']

# Loop through each participant ID
for PID in pid_list:

    # Find the corresponding behavioural (csv) file for the participant, excluding files with 'blink_record'
    fname = [f for f in os.listdir(fpath) if (f.startswith(str(PID))) and (f.endswith('.csv')) and ('blink_record' not in f)]

    # Find the corresponding ECG (edf) file for the participant with 'test' or 'Test' in the filename
    ecg_fname = [f for f in os.listdir(ECG_fpath) if (f.startswith(str(PID))) and (f.endswith('.edf') and (('test' in f) | ('Test' in f)))]

    # Skip to the next participant if no ECG file is found
    if not ecg_fname:
        continue
    else:
        # If multiple ECG files exist, take the second one, otherwise take the first one
        if len(ecg_fname) > 1:
            ecg_fname = ecg_fname[1]
        else:
            ecg_fname = ecg_fname[0]
        
    # Skip to the next participant if no behavioural file is found
    if not fname:
        continue
    else:
        fname = fname[0]

    # Read the behavioural data CSV file for the current participant
    df_raw = pd.read_csv(f'{fpath}{fname}')

    # Extract the start time of the recording from the first row of the 'date' column
    start_time = df_raw['date'].iloc[0]
    date_format = "%Y-%m-%d_%Hh%M.%S.%f"
    start_datetime = datetime.datetime.strptime(start_time, date_format)

    # Initialize a temporary DataFrame to store the current participant's output
    temp_output = pd.DataFrame(index=[0])

    # Add the ECG filename and time style (0 by default) to the output
    temp_output['filename'] = ecg_fname
    temp_output['tstyle'] = 0

    # Debugging output for the behavioural filename
    print(fname)

    ### Learn Phase Processing ###
    # Filter data for the 'Fixation' and 'Learn' phases and reset the index
    df_Fix = df_raw[(df_raw['trial_condition'] == 'Fixation')].reset_index(drop=True)
    df_Learn = df_raw[(df_raw['trial_condition'] == 'Learn')].reset_index(drop=True)
    
    # Get a list of unique blocks for the Learn phase
    block_list = df_Learn['block_n'].dropna().unique()

    # Process each block in the Learn phase
    for i in block_list:
        # Find the start and end times of the block
        df_start = df_Fix[df_Fix['block_n'] == i].reset_index(drop=True).iloc[0]
        df_end = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['fixationImage_2.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=(df_end['image.stopped']))

        # Add block-specific learn phase data to the output
        temp_output[f'learn_{i}']  = f'learn_block_{i}'
        temp_output[f'learn_{i}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'learn_{i}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if i == block_list[0]:
            overall_start = datetime_start
        elif i == block_list[-1]:
            overall_end = datetime_end

            # Add overall learn phase data to the output
            temp_output[f'learn_O']  = 'learn_overall'
            temp_output[f'learn_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'learn_O_stop'] = overall_end.strftime("%H:%M:%S")

    ### Test Phase Processing ###
    # Filter data for the 'test' phase and reset the index
    df_test = df_raw[df_raw['trial_condition'] == 'test'].reset_index(drop=True)

    # Get a list of unique blocks for the Test phase
    block_list = df_test['block_n'].dropna().unique()

    # Process each block in the Test phase
    for j in block_list:
        # Find the start and end times of the block
        df_start = df_test[df_test['block_n'] == j].reset_index(drop=True).iloc[0]
        df_end = df_test[df_test['block_n'] == j].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block (including reaction time for the end)
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['test_button.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=(df_end['test_button.started'] + df_end['test_button.rt']))

        # Add block-specific test phase data to the output
        temp_output[f'test_{j}']  = f'test_block_{j}'
        temp_output[f'test_{j}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'test_{j}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if j == block_list[0]:
            overall_start = datetime_start
        elif j == block_list[-1]:
            overall_end = datetime_end

            # Add overall test phase data to the output
            temp_output[f'test_O']  = 'test_overall'
            temp_output[f'test_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'test_O_stop'] = overall_end.strftime("%H:%M:%S")

    # Concatenate the current participant's data to the main output DataFrame
    sample_output = pd.concat([sample_output, temp_output], ignore_index=True)

# Save the final output to a CSV file (without headers and index)
sample_output.to_csv("adult_test_samples.csv", header=False, index=False)


## Extract sample points from real number

In [None]:
import pandas as pd
import datetime
import os
import numpy as np

# File paths to behavioural (BEH) and ECG data directories (these paths need to be updated)
fpath = '...' 
ECG_fpath = '...'

# Initialize an empty DataFrame to store output data
sample_output = pd.DataFrame()

# Load the list of participant IDs from an Excel file containing blink record alignments for adults
pid_list = pd.read_excel('blink_record_align.xlsx')
pid_list = pid_list['PID']

# Loop through each participant ID
for PID in pid_list:

    # Find the corresponding behavioural (csv) file for the participant
    fname = [f for f in os.listdir(fpath) if (f.startswith(str(PID))) and (f.endswith('.csv'))]

    # Find the corresponding ECG (edf) file for the participant with specific keywords ('numeral', 'numeric', 'familiar') in the filename
    ecg_fname = [f for f in os.listdir(ECG_fpath) if (f.startswith(str(PID))) and (f.endswith('.edf') and (('numeral' in f) | ('numeric' in f) | ('familiar' in f)))]

    # Skip to the next participant if no ECG file is found
    if not ecg_fname:
        continue
    else:
        # If multiple ECG files exist, take the second one, otherwise take the first one
        if len(ecg_fname) > 1:
            ecg_fname = ecg_fname[1]
        else:
            ecg_fname = ecg_fname[0]
        
    # Skip to the next participant if no behavioural file is found
    if not fname:
        continue
    else:
        fname = fname[0]

    # Debugging output for the behavioural filename
    print(fname)

    # Read the behavioural data CSV file for the current participant
    df_raw = pd.read_csv(f'{fpath}{fname}')

    # Extract the start time of the recording from the first row of the 'date' column
    start_time = df_raw['date'].iloc[0]
    date_format = "%Y-%m-%d_%Hh%M.%S.%f"
    start_datetime = datetime.datetime.strptime(start_time, date_format)

    # Initialize a temporary DataFrame to store the current participant's output
    temp_output = pd.DataFrame(index=[0])

    # Add the ECG filename and time style (0 by default) to the output
    temp_output['filename'] = ecg_fname
    temp_output['tstyle'] = 0

    ### Learn Phase Processing ###
    # Filter data for the 'trial' phase and reset the index
    df_Learn = df_raw[(df_raw['display'] == 'trial')].reset_index(drop=True)
    
    # Create trial numbers (1 to n) and assign blocks (first half = block 1, second half = block 2)
    df_Learn['trial_n'] = range(1, (len(df_Learn) + 1))
    df_Learn['block_n'] = np.where(df_Learn['trial_n'] <= (len(df_Learn) / 2), 1, 2)

    # Get a list of unique blocks for the Learn phase
    block_list = df_Learn['block_n'].dropna().unique()

    # Process each block in the Learn phase
    for i in block_list:
        # Find the start and end times of the block
        df_start = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[0]
        df_end = df_Learn[df_Learn['block_n'] == i].reset_index(drop=True).iloc[-1]

        # Calculate the start and end times for the block
        datetime_start = start_datetime + datetime.timedelta(seconds=df_start['fixation.started'])
        datetime_end = start_datetime + datetime.timedelta(seconds=(df_end['test_button.started'] + df_end['test_button.rt']))

        # Add block-specific learn phase data to the output
        temp_output[f'learn_{i}']  = f'learn_block_{i}'
        temp_output[f'learn_{i}_start'] = datetime_start.strftime("%H:%M:%S")
        temp_output[f'learn_{i}_stop'] = datetime_end.strftime("%H:%M:%S")

        # Store the overall start time (from the first block) and overall end time (from the last block)
        if i == block_list[0]:
            overall_start = datetime_start
        elif i == block_list[-1]:
            overall_end = datetime_end

            # Add overall learn phase data to the output
            temp_output[f'learn_O']  = 'learn_overall'
            temp_output[f'learn_O_start'] = overall_start.strftime("%H:%M:%S")
            temp_output[f'learn_O_stop'] = overall_end.strftime("%H:%M:%S")

    # Concatenate the current participant's data to the main output DataFrame
    sample_output = pd.concat([sample_output, temp_output], ignore_index=True)

# Save the final output to a CSV file (without headers and index)
sample_output.to_csv("adult_numeral_samples.csv", header=False, index=False)
