In [1]:
import mne
import pandas as pd
from scipy.fft import fft, fftfreq, rfft, rfftfreq, rfftn 
import numpy as np
import scipy
import time
import matplotlib.pyplot as plt 
from tqdm import tqdm
import multiprocessing as mp
import os
from collections import Counter
from edf_file_processing import create_raw_object, visualize_data, find_all_paths

In [2]:
ALL_EDF_FILES = list(find_all_paths('edf').values())
FREQ = 250
LABELS_VAL = {0: 'NONE', 1: 'FNSZ', 
              2: 'GNSZ', 3: 'SPSZ',
              4: 'CPSZ', 5: 'ABSZ', 
              6: 'TNSZ', 7: 'CNSZ',
              8: 'TCSZ', 9: 'ATSZ',
             10: 'MYSZ'}

LABELS_KEY = {v: k for k, v in LABELS_VAL.items()}

In [3]:
def xlsx_to_df(file_path):
    train_seizures_timing = pd.read_excel(
        file_path,
        sheet_name='train',
        header=0,
        names=['Patient', 'Session', 'File', 'EEG Type', 'Normal/Abnormal', 'Filename', 'Start', 'Stop', 'Seizure Type'],
        usecols='C:F, I, L, M:O'
    )
    
    dev_seizures_timing = pd.read_excel(
        file_path,
        sheet_name='dev',
        header=0,
        names=['Patient', 'Session', 'File', 'EEG Type', 'Normal/Abnormal', 'Filename', 'Start', 'Stop', 'Seizure Type'],
        usecols='C:F, I, L, M:O'
    )
    
    
    return train_seizures_timing, dev_seizures_timing

In [4]:
def get_data_times(raw_edf, start_s, duration=2):
    '''
    The function to extract the signal of a certain duration 
    starting from a concrete time in seconds
    Returns data and times as a tuple if the operation was successful
    Returns None, if the operation was not successful and prints the specific problem occured   
    '''
    try:
        start, stop = raw_edf.time_as_index([start_s, start_s+duration])
        data, times = raw_edf[:, start:stop]
        return data, times
    except:
        print("The index is out of range")
        return None

In [5]:
train_seizures_timing, dev_seizures_timing = xlsx_to_df('_DOCS/seizures_v36r.xlsx')
train_seizures_timing, dev_seizures_timing = train_seizures_timing.replace({np.nan: None}), dev_seizures_timing.replace({np.nan: None})

In [6]:
train_seizures_timing.head()

Unnamed: 0,Patient,Session,File,EEG Type,Normal/Abnormal,Filename,Start,Stop,Seizure Type
0,,,,,,Filename,Start,Stop,
1,1402.0,s003,t001,Outpatient,Normal,./train/02_tcp_le/014/00001402/s003_2008_07_21...,,,
2,1479.0,s001,t001,ICU,Abnormal,./train/02_tcp_le/014/00001479/s001_2004_06_14...,950.004,1047.02,FNSZ
3,1413.0,s001,t000,EMU,Abnormal,./train/02_tcp_le/014/00001413/s001_2004_07_13...,315.34,319.024,ABSZ
4,,,,,,./train/02_tcp_le/014/00001413/s001_2004_07_13...,327.668,330.032,ABSZ


In [7]:
def delete_unknown_files():
    to_delete = list(train_seizures_timing['Filename'][train_seizures_timing['EEG Type']=='Unknown']) + \
    list(dev_seizures_timing['Filename'][dev_seizures_timing['EEG Type']=='Unknown'])
    
    for file in to_delete:
        splitted = file[1:-3].split('/')
#         print(f'edf{"/".join(splitted[:-1])}/filtered_{splitted[-1]}edf')
        try:
            os.remove(f'edf{"/".join(splitted[:-1])}/filtered_{splitted[-1]}edf')
            os.remove(f'edf{file[1:]}')
            os.remove(f'edf{file[1:-3]}lbl')
            os.remove(f'edf{file[1:-3]}lbl_bi')
            os.remove(f'edf{file[1:]}_bi')
        except:
            pass

#     print(to_delete)

In [8]:
# delete_unknown_files()

In [9]:
stop_time = train_seizures_timing['Stop'].dropna()[1:].apply(lambda x: round(float(x)))
start_time = train_seizures_timing['Start'].dropna()[1:].apply(lambda x: round(float(x)))

In [10]:
min_time_seizure = list(stop_time - start_time)
min_time_seizure.sort()
c = Counter(min_time_seizure)
# print(min_time_seizure)

In [11]:
dct_counter = dict(c)

In [12]:
print(f"Min duration of a seizure is {list(dct_counter.keys())[0]} seconds")

Min duration of a seizure is 2 seconds


In [15]:
def labeling_data(file_path):
    '''
    Creating a .txt file for storing the line-by-line label for 2-sec data 
    '''
    raw_data = create_raw_object(file_path)
#     print(raw_data.info)
    duration=int(raw_data.n_times / raw_data.info['sfreq'])
#     print(duration)
    data, times = get_data_times(raw_edf=raw_data,  start_s=0,
                                 duration=duration)
    
    data_len = len(data)
#     print(data_len)
    num_two_sec_periods = int(len(data[0])/(2*FREQ))
#     print(num_two_sec_periods)
    
    tse_filename = f'.{file_path[3:-3]}tse'.replace('filtered_', '')
    dict_seizures = {}
    
    if 'train/' in file_path:
        df = train_seizures_timing
    elif 'dev/' in file_path:
        df = dev_seizures_timing
    
    seizure_labels = list(df['Seizure Type'][df['Filename']==tse_filename])
    
    if seizure_labels[0] is not None:
    
        start_seizures = list(df['Start'][df['Filename']==tse_filename])
        stop_seizures = list(df['Stop'][df['Filename']==tse_filename])

        
        for i in range(len(start_seizures)):
            range_seizure_periods = (round(start_seizures[i]/2), round(stop_seizures[i]/2))
#             print(seizure_labels[i])
#             print(range_seizure_periods)
            if dict_seizures.get(seizure_labels[i], None) is None:
                dict_seizures[seizure_labels[i]] = [i for i in range(range_seizure_periods[0], 
                                                                     range_seizure_periods[1])]
            else:
                dict_seizures[seizure_labels[i]] += [i for i in range(range_seizure_periods[0], 
                                                                      range_seizure_periods[1])]
                
#     print(dict_seizures.values())
    
    txt_name = f'{file_path[:-3]}txt'
    
    file = open(txt_name, 'w')
#     print(f'{file_path[:-3]}txt')
#     print(type(dict_seizures['ABSZ'][0]))
    
    for period in range(1, num_two_sec_periods+1):
        sez_type = "NONE"
        for s in dict_seizures:
            if len(dict_seizures[s]) != 0 and period == dict_seizures[s][0]:
#                 print(period)
                sez_type = s
#                 print(sez_type)
#                 print('\n')
                dict_seizures[s].pop(0)
        file.write(sez_type)
        file.write('\n')
    file.close()
    
    return txt_name

In [16]:
def main():
    with mp.Pool(10) as pool:
        pool.map(labeling_data,  list(find_all_paths('edf/train/').values()))

In [17]:
start_time = time.time()
main()
duration = time.time() - start_time
print(f"Total time: {duration}")

Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp_le/050/00005096/s006_2008_11_04/filtered_00005096_s006_t000.edf...
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp_le/060/00006087/s006_2011_04_12/filtered_00006087_s006_t000.edf...
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp_le/054/00005427/s002_2009_02_22/filtered_00005427_s002_t000.edf...
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp_le/003/00000302/s002_2003_09_25/filtered_00000302_s002_t002.edf...
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/01_tcp_ar/054/00005427/s011_2012_10_25/filtered_00005427_s011_t006.edf...
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp_le/055/00005575/s001_2009_05_06/filtered_00005575_s001_t000.edf...
EDF file detected
Extracting EDF parameters from /home/sophia/projects/eeg_epilepsy/edf/train/02_tcp

In [2]:
list(find_all_paths('edf/dev/').values())

['edf/dev/02_tcp_le/050/00005031/s001_2008_07_22/filtered_00005031_s001_t000.edf',
 'edf/dev/02_tcp_le/007/00000795/s001_2003_12_02/filtered_00000795_s001_t001.edf',
 'edf/dev/02_tcp_le/007/00000795/s001_2003_12_02/filtered_00000795_s001_t002.edf',
 'edf/dev/02_tcp_le/054/00005479/s002_2008_11_14/filtered_00005479_s002_t000.edf',
 'edf/dev/02_tcp_le/054/00005479/s001_2008_11_14/filtered_00005479_s001_t003.edf',
 'edf/dev/02_tcp_le/054/00005479/s001_2008_11_14/filtered_00005479_s001_t002.edf',
 'edf/dev/02_tcp_le/054/00005479/s001_2008_11_14/filtered_00005479_s001_t000.edf',
 'edf/dev/02_tcp_le/054/00005479/s001_2008_11_14/filtered_00005479_s001_t001.edf',
 'edf/dev/02_tcp_le/056/00005625/s002_2010_03_29/filtered_00005625_s002_t001.edf',
 'edf/dev/02_tcp_le/056/00005625/s001_2009_03_31/filtered_00005625_s001_t001.edf',
 'edf/dev/02_tcp_le/010/00001027/s001_2004_01_27/filtered_00001027_s001_t001.edf',
 'edf/dev/02_tcp_le/059/00005943/s001_2009_06_28/filtered_00005943_s001_t000.edf',
 'ed