In [22]:
import pandas as pd

In [23]:
'''
Truncate the original iEEG time series by taking all the seizure time points along with 5000 time points from before as well as after seizure.
file_id     episode     start       dt  SzOnLoc         tend	tend_sec	tsz	    tsz_sec	dt_s
111	        15:42:30    15:40:46	104	L mesial temp	43101	215.505	    34974	174.87	40.635
112	        17:04:35    17:02:51	104	L mesial temp	38828	194.14	    26999	134.995	59.145
113	        19:42:49    19:41:06	103	R mesial temp	35820	179.1	    25562	127.81	51.29
'''

def truncate_data(data, tstart, tend):
    channels = data.columns[1:]
    # Extract data for each channel as a NumPy array
    channel_data = {}
    for channel in channels:
        channel_data[channel] = data[channel].to_numpy()

    # Truncate data start-5000:end+5000
    truncated_data = {}
    for channel, data in channel_data.items():
        truncated_data[channel] = data[tstart-5000:tend+5000]

    # Save the truncated data to a new CSV file
    truncated_data_df = pd.DataFrame(truncated_data)

    return truncated_data_df

In [24]:
'''
Fragmentize the truncated time series into 14 parts.
Scheme: b/f seizure + seizure + a/f seizure = 2+10+2
Length of the fragments, separately in the three regions, must be identical.
'''

def fragmentize_data(data, filename):
    # add one more column to the data frame to store the fragment number
    data['fragment'] = 0

    # fragmentize the data
    # divide the first 5000 points into 2 fragments
    data.loc[0:2500, 'fragment'] = 1
    data.loc[2501:5000, 'fragment'] = 2

    # divide the last 5000 points into 2 fragments
    data.loc[len(data)-5000:len(data)-2501, 'fragment'] = 13
    data.loc[len(data)-2500:len(data), 'fragment'] = 14

    # divide the seizure points into 10 fragments
    seizure_points = data[data['fragment'] == 0].index
    n = len(seizure_points)
    for i in range(10):
        data.loc[seizure_points[int(i*n/10):int((i+1)*n/10)], 'fragment'] = i+3
        
    # save the fragmentized data to a new CSV file
    data.to_csv((filename.replace('.csv', '_fragmentized.csv')), index=False)

In [25]:
'''
Remove reference channels and any channels that start with G, F, I. Column names must be only the channel names without any other characters in the string.
Reference channels after cleaning: "b'SEEG TLR03'","b'SEEG TLR04'"
'''

def clean_data(data, column_names):
    # Remove reference channels
    data = data.drop(columns=column_names)


    # Rename the columns to remove the b' and ' characters
    data.columns = [channel[2:-1] for channel in data.columns]
    # also remove SEEG from the channel names
    data.columns = [channel.replace('SEEG ', '') for channel in data.columns]
    
    # Remove channels that start with G, F, I
    data = data[data.columns[~data.columns.str.startswith('G')]]
    data = data[data.columns[~data.columns.str.startswith('F')]]
    data = data[data.columns[~data.columns.str.startswith('I')]]
    # also rearrange the columns
    # in this order : TLL,TBAL,TBPL,TL,TR,TBPR,TBAR,TLR
    # and each one within in alphabetical order, ie. TLL1, TLL2, TLL3, ...
    # data = data.reindex(sorted(data.columns), axis=1)
    tll_list = [channel for channel in data.columns if channel.startswith('TLL')]
    tbal_list = [channel for channel in data.columns if channel.startswith('TBAL')]
    tbpl_list = [channel for channel in data.columns if channel.startswith('TBPL')]
    
    tl_list = [channel for channel in data.columns if channel.startswith('TL')]
    tl_list = [channel for channel in tl_list if not channel.startswith('TLR')]
    tl_list = [channel for channel in tl_list if not channel.startswith('TLL')]

    tr_list = [channel for channel in data.columns if channel.startswith('TR')]
    tbpr_list = [channel for channel in data.columns if channel.startswith('TBPR')]
    tbar_list = [channel for channel in data.columns if channel.startswith('TBAR')]
    tlr_list = [channel for channel in data.columns if channel.startswith('TLR')]

    # sort the lists
    tll_list.sort()
    tbal_list.sort()
    tbpl_list.sort()
    tl_list.sort()
    tr_list.sort()
    tbpr_list.sort()
    tbar_list.sort()
    tlr_list.sort()

    data = data[tll_list + tbal_list + tbpl_list + tl_list + tr_list + tbpr_list + tbar_list + tlr_list]

    # print the order of the columns
    print("Order of the columns: ", data.columns)

    return data

In [26]:
data = pd.read_csv('029b0RMT.csv')
truncated_data = truncate_data(data, 29095, 42573)
cleaned_data = clean_data(truncated_data, ["b\'SEEG TBAL1\'","b\'SEEG TBAL2\'"])
data = fragmentize_data(cleaned_data, '029b0RMT.csv')

Order of the columns:  Index(['TLL01', 'TLL02', 'TLL03', 'TLL04', 'TBAL3', 'TBAL4', 'TBPL1', 'TBPL2',
       'TBPL3', 'TBPL4', 'TL01', 'TL02', 'TL03', 'TL04', 'TL05', 'TL06',
       'TL07', 'TL08', 'TL09', 'TL10', 'TR01', 'TR02', 'TR03', 'TR04', 'TR05',
       'TR06', 'TR07', 'TR08', 'TR09', 'TR10', 'TBPR1', 'TBPR2', 'TBPR3',
       'TBPR4', 'TBAR1', 'TBAR2', 'TBAR3', 'TBAR4', 'TLR01', 'TLR02', 'TLR03',
       'TLR04'],
      dtype='object')


In [27]:
data = pd.read_csv('117h0RMT.csv')
truncated_data = truncate_data(data, 35841, 47050)
cleaned_data = clean_data(truncated_data, ["b\'SEEG FLL07\'","b\'SEEG FLL08\'"])
fragmentize_data(cleaned_data, '117h0RMT.csv')

Order of the columns:  Index(['TLL01', 'TLL02', 'TLL03', 'TLL04', 'TBAL1', 'TBAL2', 'TBAL3', 'TBAL4',
       'TBPL1', 'TBPL2', 'TBPL3', 'TBPL4', 'TL01', 'TL02', 'TL03', 'TL04',
       'TL05', 'TL06', 'TL07', 'TL08', 'TL09', 'TL10', 'TR01', 'TR02', 'TR03',
       'TR04', 'TR05', 'TR06', 'TR07', 'TR08', 'TR09', 'TR10', 'TBPR1',
       'TBPR2', 'TBPR3', 'TBPR4', 'TBAR1', 'TBAR2', 'TBAR3', 'TBAR4', 'TLR01',
       'TLR02', 'TLR03', 'TLR04'],
      dtype='object')
