In [18]:
import pandas as pd

In [19]:
'''
Truncate the original iEEG time series by taking all the seizure time points along with 5000 time points from before as well as after seizure.
file_id     episode     start       dt  SzOnLoc         tend	tend_sec	tsz	    tsz_sec	dt_s
111	        15:42:30    15:40:46	104	L mesial temp	43101	215.505	    34974	174.87	40.635
112	        17:04:35    17:02:51	104	L mesial temp	38828	194.14	    26999	134.995	59.145
113	        19:42:49    19:41:06	103	R mesial temp	35820	179.1	    25562	127.81	51.29
'''

def truncate_data(data, tstart, tend):
    channels = data.columns[1:]
    # Extract data for each channel as a NumPy array
    channel_data = {}
    for channel in channels:
        channel_data[channel] = data[channel].to_numpy()

    # Truncate data start-5000:end+5000
    truncated_data = {}
    for channel, data in channel_data.items():
        truncated_data[channel] = data[tstart-5000:tend+5000]

    # Save the truncated data to a new CSV file
    truncated_data_df = pd.DataFrame(truncated_data)

    return truncated_data_df

In [20]:
'''
Fragmentize the truncated time series into 14 parts.
Scheme: b/f seizure + seizure + a/f seizure = 2+10+2
Length of the fragments, separately in the three regions, must be identical.
'''

def fragmentize_data(data, filename):
    # divide the the first 5000 points into 2 parts
    first = data[:5000]
    first1 = first[:2500]
    first2 = first[2500:]

    # divide the last 5000 points into 2 parts
    last = data[-5000:]
    last1 = last[:2500]
    last2 = last[2500:]

    # divide the seizure points into 10 parts
    seizure = data[5000:-5000]
    seizure_len = len(seizure)
    part_len = seizure_len//10
    seizure_parts = []
    for i in range(10):
        seizure_parts.append(seizure[i*part_len:(i+1)*part_len])

    # Save the different parts to a new CSV file
    first1_df = pd.DataFrame(first1)
    first2_df = pd.DataFrame(first2)
    seizure_during_df = [pd.DataFrame(part) for part in seizure_parts]
    last1_df = pd.DataFrame(last1)
    last2_df = pd.DataFrame(last2)

    first1_df.to_csv('./data/' + filename.replace('.csv', '_first_part_1.csv'), index=False)
    first2_df.to_csv('./data/' + filename.replace('.csv', '_first_part_2.csv'), index=False)
    for i in range(10):
        seizure_during_df[i].to_csv('./data/' + filename.replace('.csv', '_seizure_part_' + str(i+1) + '.csv'), index=False)
    last1_df.to_csv('./data/' + filename.replace('.csv', '_last_part_1.csv'), index=False)
    last2_df.to_csv('./data/' + filename.replace('.csv', '_last_part_2.csv'), index=False)

In [21]:
'''
Remove reference channels and any channels that start with G, F, I. Column names must be only the channel names without any other characters in the string.
Reference channels after cleaning: "b'SEEG TLR03'","b'SEEG TLR04'"
'''

def clean_data(data, column_names):
    # Remove reference channels
    data = data.drop(columns=column_names)

    # Remove channels that start with G, F, I
    # don't understand
    # need clarification

    # Rename the columns to remove the b' and ' characters
    data.columns = [channel[2:-1] for channel in data.columns]

    return data

In [22]:
data = pd.read_csv('029b0RMT.csv')
truncated_data = truncate_data(data, 29095, 42573)
cleaned_data = clean_data(truncated_data, ["b\'SEEG TBAL1\'","b\'SEEG TBAL2\'"])
fragmentize_data(cleaned_data, '029b0RMT.csv')

In [23]:
data = pd.read_csv('117h0RMT.csv')
truncated_data = truncate_data(data, 35841, 47050)
cleaned_data = clean_data(truncated_data, ["b\'SEEG FLL07\'","b\'SEEG FLL08\'"])
fragmentize_data(cleaned_data, '117h0RMT.csv')