In [None]:
# add drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# add imports
import os
import zipfile
import csv
import copy
import pandas as pd
import numpy as np

In [None]:
# get list of data folders
content_path = "/content/drive/My Drive/epi_data_folders/"
list_of_data_dir = [study for study in os.listdir(content_path) if "MSEL" in study]
print(list_of_data_dir)

['MSEL_01575', 'MSEL_01828', 'MSEL_01709', 'MSEL_01870', 'MSEL_01842', 'MSEL_01844', 'MSEL_01097', 'MSEL_00182', 'MSEL_01676', 'MSEL_01849', 'MSEL_01808', 'MSEL_00501', 'MSEL_00172', 'MSEL_01763', 'MSEL_00502', 'MSEL_00095', 'MSEL_01110-ICU']


In [None]:
# process labels for concat data
# labels: 0 - preictal, 1 - seizure, 2 - interictal
for data_dir in list_of_data_dir:
  # display dataframe
  print(data_dir)

  # clean up original labels
  events = pd.read_csv(content_path + data_dir + "/" + data_dir + "_labels.csv", delimiter=',')
  new_events = events[['name', 'startTime', 'duration', 'labels.startTime', 'labels.duration', 'labels.note']].copy()
  new_events.columns = ['name', 'study_start_time', 'study_duration', 'label_start_time', 'label_duration', 'label_note']
  new_events.insert(0, 'label', 1)
  new_events = new_events.drop_duplicates(ignore_index=True)
  new_events = new_events.sort_values(by=['label_start_time'], ignore_index=True)

  # create a copy of original labels to iterate over
  events_seizure = new_events.copy()

  # iterate over original only seizure labels to label pre- and interictal data
  for i, row in events_seizure.iterrows():
      preictal = copy.deepcopy(row)
      interictal = copy.deepcopy(row)
      # add preictal information
      preictal['label'] = 0
      preictal['label_note'] = 'Preictal - ' + preictal['label_note']
      
      # add interictal information
      interictal['label'] = 2
      interictal['label_note'] = 'Interictal - ' + interictal['label_note']

      possible_start_time_inter = int(row['label_start_time']) + int(row['label_duration']) + (120 * 60 * 1000)
      possible_start_time_pre = int(preictal['label_start_time']) - (75 * 60 * 1000)

      
      # check whether preictal overlaps with previous seizure, if yes - do not label
      if i > 0:
        try:
          if (int(events_seizure.iloc[i-1]['label_start_time'])) < possible_start_time_pre:
              preictal['label_start_time'] = possible_start_time_pre
              preictal['label_duration'] = 60 * 60 * 1000
              new_events = new_events.append(preictal, ignore_index=True)
        except IndexError as e:
              print('no space for preictal data')

      # check whether inerictal overlaps with next seizure's preictal, if yes - do not label
      try:
        if possible_start_time_inter < (int(events_seizure.iloc[i+1]['label_start_time']) - (75 * 60 * 1000)):
            interictal['label_start_time'] = possible_start_time_inter
            interictal['label_duration'] = (int(events_seizure.iloc[i+1]['label_start_time']) - (75 * 60 * 1000)) - possible_start_time_inter
            new_events = new_events.append(interictal, ignore_index=True)
      except IndexError as e:
            print('no space for interictal data')

  # add new concat labels to csv
  new_events.to_csv(content_path + data_dir + "/" + data_dir + "_labels_new.csv", index=False)



MSEL_01575
no space for interictal data
MSEL_01828
no space for interictal data
MSEL_01709
no space for interictal data
MSEL_01870
no space for interictal data
MSEL_01842
no space for interictal data
MSEL_01844
no space for interictal data
MSEL_01097
no space for interictal data
MSEL_00182
no space for interictal data
MSEL_01676
no space for interictal data
MSEL_01849
no space for interictal data
MSEL_01808
no space for interictal data
MSEL_00501
no space for interictal data
MSEL_00172
no space for interictal data
MSEL_01763
no space for interictal data
MSEL_00502
no space for interictal data
MSEL_00095
no space for interictal data
MSEL_01110-ICU
no space for interictal data


In [None]:
# cut the labels at end times when data is missing
start_times = {
             'MSEL_01575': 1571251745000, 
             'MSEL_01828': 1548093402000,
             'MSEL_01709': 1544222237000,
             'MSEL_01870': 1573167317000,
             'MSEL_01842': 1558037650000,
             'MSEL_01844': 1558559048000,
             'MSEL_01097': 1552336421000,
             'MSEL_00182': 1564081666750,
             'MSEL_01676': 1551478692000,
             'MSEL_01849': 1560438121000,
             'MSEL_01808': 1557348756000,
             'MSEL_00501': 1556830144000,
             'MSEL_00172': 1556307778000,
             'MSEL_01763': 1551305302000,
             'MSEL_00502': 1551533038000, 
             'MSEL_00095': 0, 
             'MSEL_01110-ICU': 0
}

end_times = {
             'MSEL_01575': 1571695329867, 
             'MSEL_01828': 1548170466266,
             'MSEL_01709': 1544633089164,
             'MSEL_01870': 1573598375992,
             'MSEL_01842': 1558340907844,
             'MSEL_01844': 1558882951781,
             'MSEL_01097': 1552757758242,
             'MSEL_00182': 1564388537797,
             'MSEL_01676': 1551741522492,
             'MSEL_01849': 1560955967781,
             'MSEL_01808': 1557678582953,
             'MSEL_00501': 1557111309516,
             'MSEL_00172': 1556559594875,
             'MSEL_01763': 1551385416992,
             'MSEL_00502': 1551706309906, 
             'MSEL_00095': 1543949562000, 
             'MSEL_01110-ICU': 1560153901000
}

# process labels for segmented data
for data_dir in list_of_data_dir:
    print(data_dir)
    events = pd.read_csv(content_path + data_dir + "/" + data_dir + "_labels_new.csv", delimiter=',')
    print(content_path + data_dir + "/" + data_dir + "_labels_new.csv")
    events = events.sort_values(by=['label_start_time'], ignore_index=True)

    segmented_events = pd.DataFrame(columns=['label', 'name', 'study_start_time', 'study_duration', 'label_start_time', 'label_duration', 'label_note'])

    # get study data
    study_name = events.iloc[0]['name']
    study_start = int(events.iloc[0]['study_start_time'])
    study_duration = events.iloc[0]['study_duration']

    # get segment start data
    segment_start_time = start_times[data_dir]

    # get label data
    print('label start', int(events.iloc[0]['label_start_time']))
    print('study start', start_times[data_dir])

    # check whether label data starts after earliest start time
    main_label_start = start_times[data_dir]
    if int(events.iloc[0]['label_start_time']) > main_label_start:
      main_label_start = int(events.iloc[0]['label_start_time'])
      segment_start_time = int(events.iloc[0]['label_start_time'])
       
    last_label_end = int(events.iloc[-1]['label_start_time']) + int(events.iloc[-1]['label_duration'])

    while main_label_start <= segment_start_time <= last_label_end:
          # initialize list of lists
          for i, row in events.iterrows():
            if int(row['label_start_time']) <= int(segment_start_time) <= (int(row['label_start_time']) + int(row['label_duration'])) and int(segment_start_time) <= end_times[data_dir]:
              label = row['label']
              label_note = row['label_note']
                  
              if (int(segment_start_time) + 60000 <= end_times[data_dir]):
                label_duration = 60000
              else:
                label_duration = int(end_times[data_dir]) - int(segment_start_time)

              data = pd.DataFrame({'label':label, 'name': study_name, 'study_start_time':study_start, 'study_duration':study_duration, 'label_start_time': segment_start_time, 'label_duration': label_duration, 'label_note': label_note}, index=[0])
              segmented_events = segmented_events.append(data)
              break
                
          segment_start_time += 60 * 1000

    segmented_events.to_csv(content_path + data_dir + "/" + data_dir + "_labels_split.csv", index=False)



MSEL_01575
/content/drive/My Drive/epi_data_folders/MSEL_01575/MSEL_01575_labels_new.csv
label start 1571510363000
study start 1571251745000
MSEL_01828
/content/drive/My Drive/epi_data_folders/MSEL_01828/MSEL_01828_labels_new.csv
label start 1548113352000
study start 1548093402000
MSEL_01709
/content/drive/My Drive/epi_data_folders/MSEL_01709/MSEL_01709_labels_new.csv
label start 1544279270000
study start 1544222237000
MSEL_01870
/content/drive/My Drive/epi_data_folders/MSEL_01870/MSEL_01870_labels_new.csv
label start 1573313368000
study start 1573167317000
MSEL_01842
/content/drive/My Drive/epi_data_folders/MSEL_01842/MSEL_01842_labels_new.csv
label start 1558054199000
study start 1558037650000
MSEL_01844
/content/drive/My Drive/epi_data_folders/MSEL_01844/MSEL_01844_labels_new.csv
label start 1558752344000
study start 1558559048000
MSEL_01097
/content/drive/My Drive/epi_data_folders/MSEL_01097/MSEL_01097_labels_new.csv
label start 1552438175000
study start 1552336421000
MSEL_00182
/c