In [1]:
import os
import scipy.io
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from google.colab import files
uploaded = files.upload()
df = pd.read_csv('Hackathon_full_dataset.csv')

Saving Hackathon_full_dataset.csv to Hackathon_full_dataset.csv


In [3]:
# This method segments all the data into ordinal epochs.
def segment_epochs_all(data):
    data['signal'] = data['trigger'].apply(lambda x: 'Yes' if x == 1 or x == -1 else 'No')

    data['epoch'] = 0
    # Calculating time difference between samples
    fs = 250  # Sampling frequency in Hz
    time_diff = 1 / fs  # Time difference between samples

    # Time before and after trigger event (in seconds)
    time_before = 0.1
    time_after = 0.7

    # Number of samples before and after the trigger event
    n_before = int(time_before / time_diff)  # Should be 25 samples
    n_after = int(time_after / time_diff)    # Should be 175 samples

    # Total number of samples in an epoch
    n_epoch = n_before + n_after  # 200 samples

    # Get the unique participant IDs
    participant_ids = data['participant_id'].unique()

    for participant_id in participant_ids:
        # Filter data for the current participant
        participant_data = data[data['participant_id'] == participant_id].reset_index(drop=True)

        # Reset index to align with original data index
        participant_data.reset_index(inplace=True)

        # Find indices where 'signal' is 'Yes' (trigger events)
        trigger_indices = participant_data.index[participant_data['signal'] == 'Yes'].tolist()

        for trigger_index in trigger_indices:
            # Calculate start and end indices for the epoch
            start_idx = trigger_index - n_before
            end_idx = trigger_index + n_after

            # Handle boundary conditions
            if start_idx < 0:
                start_idx = 0
            if end_idx > len(participant_data):
                end_idx = len(participant_data)

            # Add 1 to epoch for when a section is covered by a signal

            # Get the integer position of the 'epoch' column
            epoch_col_idx = participant_data.columns.get_loc('epoch')

            # Increment the 'epoch' column for the specified range
            participant_data.iloc[start_idx:end_idx, epoch_col_idx] += 1

        # Update 'data' with the modified 'participant_data'
        data.update(participant_data.set_index('index'))

    return data

In [7]:
# This method creates a datsaset which only keeps the relevant segments around signals and orders them from first to last
def segment_epochs_signal_only(data):
    data['signal'] = data['trigger'].apply(lambda x: 'Yes' if x == 1 or x == -1 else 'No')

    # Calculating time difference between samples
    fs = 250  # Sampling frequency in Hz
    time_diff = 1 / fs  # Time difference between samples

    # Time before and after trigger event (in seconds)
    time_before = 0.1
    time_after = 0.7

    # Number of samples before and after the trigger event
    n_before = int(time_before / time_diff)  # Should be 25 samples
    n_after = int(time_after / time_diff)    # Should be 175 samples

    epochs = []

    # Get the unique participant IDs
    participant_ids = data['participant_id'].unique()

    for participant_id in participant_ids:
        # Filter data for the current participant
        participant_data = data[data['participant_id'] == participant_id].reset_index(drop=True)

        # Reset epoch counter for each participant
        e = 1

        # Find indices where 'signal' is 'Yes' (trigger events)
        trigger_indices = participant_data.index[participant_data['signal'] == 'Yes'].tolist()

        for trigger_index in trigger_indices:
            # Calculate start and end indices for the epoch
            start_idx = trigger_index - n_before
            end_idx = trigger_index + n_after

            # Handle boundary conditions
            if start_idx < 0:
                start_idx = 0
            if end_idx > len(participant_data):
                end_idx = len(participant_data)

            # Extract the epoch data
            epoch_data = participant_data.iloc[start_idx:end_idx].copy()

            # Assign epoch number
            epoch_data['epoch'] = e

            # Append epoch data to the list
            epochs.append(epoch_data)

            # Increment epoch counter
            e += 1

    # Combine all epochs into one DataFrame
    epochs_df = pd.concat(epochs, ignore_index=True)
    return epochs_df

In [8]:
df_epochs_signal_only = segment_epochs_signal_only(df)

In [13]:
print(df_epochs_signal_only.shape)
df_epochs_signal_only["epoch"].unique()

(1194000, 14)


array([   1,    2,    3, ..., 2398, 2399, 2400])

In [4]:
df_epoch = segment_epochs_all(df)

In [14]:
print(df_epoch.shape)
df_epoch["epoch"].unique()
df_epoch.tail()

(304269, 14)


Unnamed: 0.1,Unnamed: 0,participant_id,timestep,channel_1,channel_2,channel_3,channel_4,channel_5,channel_6,channel_7,channel_8,trigger,signal,epoch
304264,60850,5,60851,25.200833,-8.160597,-1.632134,7.230267,-12.889168,23.064468,-13.719194,-0.573368,0,No,0
304265,60851,5,60852,25.114429,-7.634543,-0.077657,8.80284,-11.99877,20.266819,-14.060965,-0.34835,0,No,0
304266,60852,5,60853,25.702621,-6.424084,1.683722,9.77765,-10.367086,18.965218,-12.496437,-0.159029,0,No,0
304267,60853,5,60854,26.356545,-5.345835,3.296015,10.149696,-8.80805,18.398303,-10.261105,-0.101577,0,No,0
304268,60854,5,60855,26.373341,-4.944383,4.375007,10.641672,-7.879667,17.906614,-8.566039,0.067845,0,No,0
