# Imports

In [1]:
import pyedflib
import numpy as np
from pylab import *
from scipy import signal
import pandas as pd
import seaborn
import glob, os
from collections import OrderedDict
from scipy.stats import *
import csv
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt


In [2]:
basedir = os.getcwd()
# Put here the directory of the CHBMIT DATABASE
dbdir = r"F:\Master\TFM\chb-mit-scalp-eeg-database-1.0.0"

# Functions

### Reading functions

In [3]:
class Register:
    
    def __init__(self, name, fs, nseizures):
        self.name = name
        self.fs = fs
        self.nseizures = nseizures
        self.seizures = []
        self.channels = []
            
    def addSeizure (self, start, end):
        seizure = [start, end]
        self.seizures.append(seizure)
        
    def addCh (self, channels):
        self.channels = channels

def read_data(filename, channels=[]):
    f = pyedflib.EdfReader(filename)
    if len(channels) == 0:
        channels = f.getSignalLabels()

    channel_names = f.getSignalLabels()
    fs = f.getSampleFrequencies()

    data = np.zeros((len(channels), f.getNSamples()[0]))
    for i, channel in enumerate(channels):
        data[i, :] = f.readSignal(channel_names.index(channel))
        
    time = np.linspace(0, data.shape[1]/fs[0], data.shape[1])
    f._close()
    return data, fs[0], time

def trunc(data, timeW, fs):
    samples = data.shape[1]
    timeW = 2
    N = timeW*fs
    nw = int(samples//N)

    data = data [:, 0:nw*N]
    time = np.linspace(0, data.shape[1]/fs, data.shape[1])
    return data, time, nw, N

def calc_variances_entropys(signals, nw, N):
    variances = np.zeros([signals.shape[0], nw])
    entropys = np.zeros([signals.shape[0], nw])
    for channel, signal in enumerate(signals):
        signal = np.reshape(signal, [nw, N])
        variances[channel, :] = np.var(signal, 1)
        for n, s in enumerate(signal):
            pd_series = pd.Series(s)
            counts = pd_series.value_counts()
            entropys[channel, n] = entropy(counts, base = 2)
    return variances, entropys


def select_best_signals(signals_trunc, nw, N, seizureW, nchannels, channel_index):
    
    channel_index = {y:x for x,y in channel_index.items()}
    
    variances, entropys = calc_variances_entropys(signals_trunc, nw, N)
    
    nictal_index = list(range(nw))
    ictal_index = where(seizureW == 1)[0]
    [nictal_index.remove(i) for i in ictal_index]
    
    i_variances = np.take(variances, ictal_index, axis=1)
    ni_variances = np.take(variances, nictal_index, axis=1)
    i_entropys = np.take(variances, ictal_index, axis=1)
    ni_entropys = np.take(variances, nictal_index, axis=1)
          
    score = (mean(i_variances, axis = 1) - mean(ni_variances, axis = 1)) * (mean(i_entropys, axis = 1) - mean(ni_entropys, axis = 1))
    
    dictionary = dict(zip(channel_index.keys(), score))
    sorted_dict = {k: v for k, v in sorted(dictionary.items(), key=lambda item: item[1], reverse=True)}
    
    # order the channels by significance
    significant_channels = list(sorted_dict.keys())[0:nchannels]
    significant_signals = zeros([len(significant_channels), signals_trunc.shape[1]])
    for i, key in enumerate(significant_channels):
        significant_signals[i, :] = signals_trunc[channel_index[key], :]
        
    return significant_signals, significant_channels

def select_best_signals2(signals_trunc, nw, N, seizureW, nchannels, channel_index):
    
    channel_index = {y:x for x,y in channel_index.items()}
    
    variances, entropys = calc_variances_entropys(signals_trunc, nw, N)
    
    ictal_index = where(seizureW == 1)[0]
    
    i_variances = np.take(variances, ictal_index, axis=1)
    i_entropys = np.take(variances, ictal_index, axis=1)
          
    score = mean(i_variances, axis = 1) * mean(i_entropys, axis = 1)
    
    dictionary = dict(zip(channel_index.keys(), score))
    sorted_dict = {k: v for k, v in sorted(dictionary.items(), key=lambda item: item[1], reverse=True)}
    
    # order the channels by significance
    significant_channels = list(sorted_dict.keys())[0:nchannels]
    significant_signals = zeros([len(significant_channels), signals_trunc.shape[1]])
    for i, key in enumerate(significant_channels):
        significant_signals[i, :] = signals_trunc[channel_index[key], :]
        
    return significant_signals, significant_channels

In [4]:
def read_annotations(annotation):
    with open(annotation) as f:
        registers = {}
        channels_dict = {}
        nmontages = 1
        for line in f:
            if ("Data Sampling Rate" in line):
                line = line.split()
                fs = int(line[3]) 
                
            if ("Channel " in line):
                line = line.split()
                channel = line[2]
                if channel in channels_dict:
                    channels_dict.update({channel: channels_dict[channel]+1})
                else:
                    channels_dict[channel] = 1

            if ("Channels changed" in line):
                nmontages += 1
                
            elif ("File Name" in line):
                name = line.split()[2]
                while True:
                    newLine = f.readline()
                    if ("Number of Seizures" in newLine):
                        nseizures = int(newLine.split()[5])
                        register = Register(name, fs, nseizures)
                        if nseizures > 0:
                            for i in range(nseizures):
                                line1 = f.readline().split()
                                line2 = f.readline().split()
                                if (line1[3] == "Time:"):
                                    start = int(line1[4])
                                    end = int(line2[4])
                                else:
                                    start = int(line1[3])
                                    end = int(line2[3])
                                register.addSeizure(start, end)

                        registers[name] = register
                        break
    common_channels = []
    [common_channels.append(key) for key in channels_dict.keys() if channels_dict[key] == nmontages]
    channel_index = dict(zip( list(np.arange(len(common_channels))), common_channels ))
    return registers, channel_index

# Dataset creation for all the patients

In [5]:
basedir = os.getcwd()
datasetdir = basedir + '\DatasetsSinExp'

# Put here the directory of the CHBMIT DATABASE
dbdir = r"F:\Master\TFM\chb-mit-scalp-eeg-database-1.0.0"


In [6]:
os.chdir(dbdir)
patients = [name for name in os.listdir(".") if os.path.isdir(name)]
patients = patients[11:]
patients = ['chb04']

In [7]:
f = open(dbdir + '\RECORDS-WITH-SEIZURES', 'r', encoding = 'utf-8')
seizure_files = f.read().split('\n')
seizure_files = list(map(lambda string: string[6:], seizure_files))
f.close()

In [8]:
timeW = 2
decimationCoeff = 2

for patient in patients:
    print('---------------------------------------------- Patient: ' + patient + ' ----------------------------------------------------')
    fdir = dbdir + '\\' + patient
    os.chdir(fdir)
    annotation = glob.glob('*txt')
    
    registers, channel_index = read_annotations(annotation[0])

    nchannels = len(channel_index)
    selected_channels_lof = []

    dataframe = pd.DataFrame()
    for key, value in registers.items():

        # Signal reading: only if is a seizure file
        if key in seizure_files:
            signals, originalfs, time = read_data(key, value.channels)
            # Decimation
            signals = signal.decimate(signals, decimationCoeff)
            fs = originalfs//decimationCoeff

            # Truncate to generate time windows
            signals_trunc, time, nw, N = trunc(signals, timeW, fs)
            samples = signals_trunc.shape[1]

            print("Readed " + key)

            # Seizure vector creation
            seizure = zeros(samples)


            for n in range (len(value.seizures)):
                start = value.seizures[n][0]*fs
                end = value.seizures[n][1]*fs
                seizure[start:end] = np.ones(end-start)

            seizureW = np.reshape(seizure, [nw, N])
            seizureW = (sum(seizureW, 1) > N//2)

            selected_signals, selected_channels = select_best_signals(signals_trunc, nw, N, seizureW, nchannels, channel_index)
            selected_channels_lof.append(selected_channels)

            # Create the csv file where the significance order of the selected channels is going to be stored
            os.chdir(datasetdir)
            f = open(patient + '_channel_order.csv', 'w+')
            writer=csv.writer(f)
            writer.writerow(list(range(nchannels)))
            for item in selected_channels_lof:
                writer.writerow(item)
            f.close()
            os.chdir(fdir)
    

---------------------------------------------- Patient: chb04 ----------------------------------------------------
Readed chb04_05.edf
Readed chb04_08.edf
Readed chb04_28.edf


In [34]:
channel_index

{0: 'FP1-F7',
 1: 'F7-T7',
 2: 'T7-P7',
 3: 'P7-O1',
 4: 'FP1-F3',
 5: 'F3-C3',
 6: 'C3-P3',
 7: 'P3-O1',
 8: 'FZ-CZ',
 9: 'CZ-PZ',
 10: 'FP2-F4',
 11: 'F4-C4',
 12: 'C4-P4',
 13: 'P4-O2',
 14: 'FP2-F8',
 15: 'F8-T8',
 16: 'P8-O2'}

In [None]:
l = [1,2,3,4, 5,6, 7,8, 9]

In [16]:
def separate_windows(lst, seizure_indexes):
    for enumerate
    
    return (lst[i] for i in indices)

In [15]:
l2 = select(l, [2,3,4])

6

In [37]:
list(range(9))

[0, 1, 2, 3, 4, 5, 6, 7, 8]

In [6]:
A = np.array([[1,  4, 7],
              [2, 5, 8],
              [3, 6, 9]])

In [7]:
b = np.take(A, [2], axis=1)
b

array([[7],
       [8],
       [9]])

In [8]:
c = np.take(A, [0], axis = 1)
c

array([[1],
       [2],
       [3]])

In [50]:
l = [1,2,3,4,5,6,7,8]
[l.remove(i) for i in [2,3,6]]
l

[1, 4, 5, 7, 8]

0

In [10]:
c-b

array([[-6],
       [-6],
       [-6]])

In [11]:
Z = np.array([[1, 2, 3],
              [1, 2, 3],
              [1, 2, 3]])

In [15]:
mean(Z, axis = 1)

array([2., 2., 2.])

In [5]:
basedir = os.getcwd()
datasetdir = basedir + '\ChannelOrderOnlyIctal'

# Put here the directory of the CHBMIT DATABASE
dbdir = r"F:\Master\TFM\chb-mit-scalp-eeg-database-1.0.0"


In [8]:
os.chdir(dbdir)
patients = [name for name in os.listdir(".") if os.path.isdir(name)]
patients.remove('chb12')
patients

['chb01',
 'chb02',
 'chb03',
 'chb05',
 'chb06',
 'chb07',
 'chb08',
 'chb09',
 'chb10',
 'chb11',
 'chb13',
 'chb14',
 'chb15',
 'chb16',
 'chb17',
 'chb18',
 'chb19',
 'chb20',
 'chb21',
 'chb22',
 'chb23',
 'chb24']

In [9]:
f = open(dbdir + '\RECORDS-WITH-SEIZURES', 'r', encoding = 'utf-8')
seizure_files = f.read().split('\n')
seizure_files = list(map(lambda string: string[6:], seizure_files))
f.close()

In [11]:
timeW = 2
decimationCoeff = 2

for patient in patients:
    print('---------------------------------------------- Patient: ' + patient + ' ----------------------------------------------------')
    fdir = dbdir + '\\' + patient
    os.chdir(fdir)
    annotation = glob.glob('*txt')
    
    registers, channel_index = read_annotations(annotation[0])

    nchannels = len(channel_index)
    selected_channels_lof = []

    dataframe = pd.DataFrame()
    for key, value in registers.items():

        # Signal reading: only if is a seizure file
        if key in seizure_files:
            signals, originalfs, time = read_data(key, value.channels)
            # Decimation
            signals = signal.decimate(signals, decimationCoeff)
            fs = originalfs//decimationCoeff

            # Truncate to generate time windows
            signals_trunc, time, nw, N = trunc(signals, timeW, fs)
            samples = signals_trunc.shape[1]

            print("Readed " + key)

            # Seizure vector creation
            seizure = zeros(samples)


            for n in range (len(value.seizures)):
                start = value.seizures[n][0]*fs
                end = value.seizures[n][1]*fs
                seizure[start:end] = np.ones(end-start)

            seizureW = np.reshape(seizure, [nw, N])
            seizureW = (sum(seizureW, 1) > N//2)

            selected_signals, selected_channels = select_best_signals2(signals_trunc, nw, N, seizureW, nchannels, channel_index)
            selected_channels_lof.append(selected_channels)

            # Create the csv file where the significance order of the selected channels is going to be stored
            os.chdir(datasetdir)
            f = open(patient + '_channel_order.csv', 'w+')
            writer=csv.writer(f)
            writer.writerow(list(range(nchannels)))
            for item in selected_channels_lof:
                writer.writerow(item)
            f.close()
            os.chdir(fdir)
    

---------------------------------------------- Patient: chb01 ----------------------------------------------------
Readed chb01_03.edf
Readed chb01_04.edf
Readed chb01_15.edf
Readed chb01_16.edf
Readed chb01_18.edf
Readed chb01_21.edf
Readed chb01_26.edf
---------------------------------------------- Patient: chb02 ----------------------------------------------------
Readed chb02_16.edf
Readed chb02_19.edf
---------------------------------------------- Patient: chb03 ----------------------------------------------------
Readed chb03_01.edf
Readed chb03_02.edf
Readed chb03_03.edf
Readed chb03_04.edf
Readed chb03_34.edf
Readed chb03_35.edf
Readed chb03_36.edf
---------------------------------------------- Patient: chb05 ----------------------------------------------------
Readed chb05_06.edf
Readed chb05_13.edf
Readed chb05_16.edf
Readed chb05_17.edf
Readed chb05_22.edf
---------------------------------------------- Patient: chb06 ----------------------------------------------------
Reade