In [1]:
import numpy as np
import pandas as pd
import os
import re
import pyedflib

from scipy.signal import butter, lfilter
from pyentrp import entropy as ent
from scipy.signal import welch
from scipy.signal import boxcar
from scipy.stats import variation
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import describe

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
def generateEEGdataDF(file, fft_seg_length):
#this function loads the data files, and parses the data into an appropriate dataframe
    
    #open the file, catch exception if file is already open
    try:
        f = pyedflib.EdfReader(file)
    except OSError:
        print('File already open')
        f._close()
    
    #load relevent parameters into df
    numChannels = f.signals_in_file
    channelName=[]
    chName=[]
    channelSamples=[]
    Fs=[]
    traces=[]
    
    for channel in range(numChannels):
        channelName.append(f.getLabel(channel))
        
    twoChannels = [channelName.index('T8-P8')]
    
    for ch in twoChannels:
        chName.append(f.getLabel(ch))
        channelSamples.append(f.getNSamples()[ch])
        Fs.append(f.getSampleFrequency(ch))
        traces.append(f.readSignal(ch))
        
    EEGdata=pd.DataFrame([chName,channelSamples,Fs,traces],index=['chName','channelSamples','Fs','traces'])
    
    #get engineered features from loaded data, add to existing df
    segmentedFFT(EEGdata,fft_seg_length)
    
    #remember to close the file!
    f._close()
    #print(channelName)
    return EEGdata.transpose()

In [3]:
def segmentedFFT(EEGdata,seg_length):
#this function calculates Short Fourier Transform
#length specified (seg_length)    
    for i in EEGdata.columns:
        
        samples_per_segment = EEGdata.loc['channelSamples'][i]/(EEGdata.loc['Fs'][i]/seg_length)
        tot_segments = np.floor(EEGdata.loc['channelSamples'][i]/samples_per_segment).astype(np.int)
        
        #calculate the start and end times of each segment
        startSamples = [np.int(x) for x in range(tot_segments)*samples_per_segment]
        endSamples = [np.int(x) for x in startSamples + samples_per_segment]
        
        
        #generate features for each segment
        mean_data=[]
        max_data=[]
        std_data=[]
        rms_data=[]
        kurtosis_data=[]
        cov_data=[]
        skew_data=[]
        
        delta_mean=[]
        theta_mean=[]
        alpha_mean=[]
        beta_mean=[]
        all_mean=[]
           
        delta_std=[]
        theta_std=[]
        alpha_std=[]
        beta_std=[]
        all_std=[]
        
        delta_cov=[]
        theta_cov=[]
        alpha_cov=[]
        beta_cov=[]
        all_cov=[]
            
        delta_kurtosis=[]
        theta_kurtosis=[]
        alpha_kurtosis=[]
        beta_kurtosis=[]
        all_kurtosis=[]
        
        delta_skew=[]
        theta_skew=[]
        alpha_skew=[]
        beta_skew=[]
        all_skew=[]
        
        delta_rms=[]
        theta_rms=[]
        alpha_rms=[]
        beta_rms=[]
        all_rms=[]
        
        
        for st,en in zip(startSamples,endSamples):
            
            data = welch(EEGdata.loc['traces'][i][st:en], fs=EEGdata.loc['Fs'][i],
                                                    window='hann', nperseg=None, noverlap=None,nfft=None, detrend='constant', return_onesided=True, scaling='density')
            
            # Power spectral density
            Zxx_delta, f1 = bandpass_filter(data[1], data[0],0,4)
            Zxx_theta, f2 = bandpass_filter(data[1], data[0],4,8)
            Zxx_alpha, f3 = bandpass_filter(data[1], data[0],8,12)
            Zxx_beta, f4 = bandpass_filter(data[1], data[0],12,30)
            Zxx_all, f5 = bandpass_filter(data[1], data[0],0,128)
            
            
            # Mean values
            delta_mean.append(np.mean(Zxx_delta))
            theta_mean.append(np.mean(Zxx_theta))
            alpha_mean.append(np.mean(Zxx_alpha))
            beta_mean.append(np.mean(Zxx_beta))
            all_mean.append(np.mean(Zxx_all))
            
            
            # STD values
            delta_std.append(np.std(Zxx_delta))
            theta_std.append(np.std(Zxx_theta))
            alpha_std.append(np.std(Zxx_alpha))
            beta_std.append(np.std(Zxx_beta))
            all_std.append(np.std(Zxx_all))

            # COV values
            delta_cov.append(variation(Zxx_delta))
            theta_cov.append(variation(Zxx_theta))
            alpha_cov.append(variation(Zxx_alpha))
            beta_cov.append(variation(Zxx_beta))
            all_cov.append(variation(Zxx_all))
            
            # Kurtosis values
            delta_kurtosis.append(kurtosis(Zxx_delta))
            theta_kurtosis.append(kurtosis(Zxx_theta))
            alpha_kurtosis.append(kurtosis(Zxx_alpha))
            beta_kurtosis.append(kurtosis(Zxx_beta))
            all_kurtosis.append(kurtosis(Zxx_all))
            
            # Skewness values
            delta_skew.append(skew(Zxx_delta))
            theta_skew.append(skew(Zxx_theta))
            alpha_skew.append(skew(Zxx_alpha))
            beta_skew.append(skew(Zxx_beta))
            all_skew.append(skew(Zxx_all))
            
            # RMS values
            delta_rms.append(np.sqrt(np.mean(Zxx_delta**2)))
            theta_rms.append(np.sqrt(np.mean(Zxx_theta**2)))
            alpha_rms.append(np.sqrt(np.mean(Zxx_alpha**2)))
            beta_rms.append(np.sqrt(np.mean(Zxx_beta**2)))
            all_rms.append(np.sqrt(np.mean(Zxx_all**2)))
            
            
            #Time Domain Features
            max_data.append(np.max(np.absolute(EEGdata.loc['traces'][i][st:en])))
            mean_data.append(np.mean(np.absolute(EEGdata.loc['traces'][i][st:en])))            
            std_data.append(np.std(np.absolute(EEGdata.loc['traces'][i][st:en])))
            kurtosis_data.append(kurtosis(np.absolute(EEGdata.loc['traces'][i][st:en])))
            cov_data.append(variation(np.absolute(EEGdata.loc['traces'][i][st:en])))
            skew_data.append(skew(np.absolute(EEGdata.loc['traces'][i][st:en])))
            rms_data.append(np.sqrt(np.mean((np.absolute(EEGdata.loc['traces'][i][st:en]))**2)))
            
        
        EEGdata.at['delta_mean',i]=delta_mean
        EEGdata.at['theta_mean',i]=theta_mean
        EEGdata.at['alpha_mean',i]=alpha_mean
        EEGdata.at['beta_mean',i]=beta_mean
        EEGdata.at['all_mean',i]=all_mean
        
        EEGdata.at['delta_std',i]=delta_std
        EEGdata.at['theta_std',i]=theta_std
        EEGdata.at['alpha_std',i]=alpha_std
        EEGdata.at['beta_std',i]=beta_std
        EEGdata.at['all_std',i]=all_std
        
        EEGdata.at['delta_cov',i]=delta_cov
        EEGdata.at['theta_cov',i]=theta_cov
        EEGdata.at['alpha_cov',i]=alpha_cov
        EEGdata.at['beta_cov',i]=beta_cov
        EEGdata.at['all_cov',i]=all_cov
            
        EEGdata.at['delta_kurtosis',i]=delta_kurtosis
        EEGdata.at['theta_kurtosis',i]=theta_kurtosis
        EEGdata.at['alpha_kurtosis',i]=alpha_kurtosis
        EEGdata.at['beta_kurtosis',i]=beta_kurtosis
        EEGdata.at['all_kurtosis',i]=all_kurtosis
        
        EEGdata.at['delta_skew',i]=delta_skew
        EEGdata.at['theta_skew',i]=theta_skew
        EEGdata.at['alpha_skew',i]=alpha_skew
        EEGdata.at['beta_skew',i]=beta_skew
        EEGdata.at['all_skew',i]=all_skew
        
        EEGdata.at['delta_rms',i]=delta_rms
        EEGdata.at['theta_rms',i]=theta_rms
        EEGdata.at['alpha_rms',i]=alpha_rms
        EEGdata.at['beta_rms',i]=beta_rms
        EEGdata.at['all_rms',i]=all_rms
             
        
        #Time domain
        EEGdata.at['max',i]=max_data
        EEGdata.at['mean',i]=mean_data
        EEGdata.at['std',i]=std_data
        EEGdata.at['rms',i]=rms_data
        EEGdata.at['kurtosis',i]=kurtosis_data
        EEGdata.at['cov',i]=cov_data
        EEGdata.at['skewness',i]=skew_data
        
    print('zsamples_per_segment: {}\nTotal Segment: {}'.format(samples_per_segment,tot_segments))


In [4]:
def butter_bandpass(lowcut, highcut, fs, order=2):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return a, b

In [5]:
def butter_bandpass_filter(data, lowcut, highcut, fs, order=2):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

In [6]:
def bandpass_filter(Zxx, f, fmin, fmax):
#helper function to calculate bandpower for given freq range
    ind_min = np.argmax(f > fmin) - 1
    ind_max = np.argmax(f > fmax) - 1
    return Zxx[ind_min: ind_max], f[ind_min: ind_max]

In [7]:
def bandpower(Pxx, f, fmin, fmax):
#helper function to calculate bandpower for given freq range    
    ind_min = np.argmax(f > fmin) - 1
    ind_max = np.argmax(f > fmax) - 1
    return np.trapz(Pxx[ind_min: ind_max], f[ind_min: ind_max])

In [36]:
#the .edf files contain exactly one hour of digitized EEG signals, although those belonging to case chb10 are two hours 
#long, and those belonging to cases chb04, chb06, chb07(case-13: 1Hour), chb09(case-19:2Hour)
# and chb23 (case-06: 2Hours) are four hours long.
# file-12: cases 27, 28, and 29 are removed, bcz they do not have 'T8-P8' channel in them.

#select data directory
dataDir='/home/mirwais/chbmit/chb12'

#create listings of recordings that contain seizures
fileListing=os.listdir(dataDir)
seizureFiles = [x for x in fileListing if 'seizure' in x and '.edf' in x]
normalFiles = [x for x in fileListing if 'seizure' not in x and '.edf' in x]
seizureRecordIDs = [np.int(re.search('(?<=_)(.*?)(?=.edf)',s).group(0).lstrip("0")) for s in seizureFiles]
normalPaths = [os.path.join(dataDir,s) for s in normalFiles]

#aggregate data into df
studyDataList=[]
studyKeyList = []
for idx,files in enumerate(normalPaths):
    #For 1Hour long file: (16/225) => 1 sec,
    #For 2Hours long file: (16/450) => 1 sec,
    #For 4Hours long files: (16/900) => 1 sec
    studyDataList.append(generateEEGdataDF(files,(32/225)))
    studyKeyList.append(np.int(re.search('(?<=_)(.*?)(?=.edf)',normalFiles[idx]).group(0).lstrip("0")))
    
studyDF = pd.concat(studyDataList,axis=0,keys=studyKeyList, names=['recordingID', 'electrode'])
studyDF=studyDF[studyDF.chName != '-']
studyDF.sort_index(axis=0,level=0,inplace=True,sort_remaining=False)

#label targets
studyDF['sz'] = 0
#for j in seizureRecordIDs:
 #   for idx,st in enumerate(studyDF.index.get_level_values(level=0)):
  #      if(st==j):
   #         studyDF.iloc[idx,-1] = 1
    

#list of derived features to be analyzed
derivedFeatures = studyDF.loc[:,['max','mean','std','rms','kurtosis','cov','skewness',
                                 'delta_mean','theta_mean','alpha_mean','beta_mean','all_mean',
                                 'delta_std','theta_std','alpha_std','beta_std','all_std',
                                 'delta_cov','theta_cov','alpha_cov','beta_cov','all_cov',
                                 'delta_kurtosis','theta_kurtosis','alpha_kurtosis','beta_kurtosis','all_kurtosis',
                                 'delta_skew','theta_skew','alpha_skew','beta_skew','all_skew',
                                 'delta_rms','theta_rms','alpha_rms','beta_rms','all_rms']]

     
#unstack lists into new series
#Tme Domain
t7 = derivedFeatures.apply(lambda x: pd.Series(x['max']),axis=1).stack().reset_index(level=1, drop=True)
t7.name = 'max'
t1 = derivedFeatures.apply(lambda x: pd.Series(x['mean']),axis=1).stack().reset_index(level=1, drop=True)
t1.name = 'mean'
t2 = derivedFeatures.apply(lambda x: pd.Series(x['std']),axis=1).stack().reset_index(level=1, drop=True)
t2.name = 'std'
t3 = derivedFeatures.apply(lambda x: pd.Series(x['rms']),axis=1).stack().reset_index(level=1, drop=True)
t3.name='rms'
t4 = derivedFeatures.apply(lambda x: pd.Series(x['kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
t4.name = 'kurtosis'
t5 = derivedFeatures.apply(lambda x: pd.Series(x['cov']),axis=1).stack().reset_index(level=1, drop=True)
t5.name = 'cov'
t6 = derivedFeatures.apply(lambda x: pd.Series(x['skewness']),axis=1).stack().reset_index(level=1, drop=True)
t6.name = 'skewness'

#Freq Domain
f1 = derivedFeatures.apply(lambda x: pd.Series(x['delta_mean']),axis=1).stack().reset_index(level=1, drop=True)
f1.name = 'delta_mean'
f2 = derivedFeatures.apply(lambda x: pd.Series(x['theta_mean']),axis=1).stack().reset_index(level=1, drop=True)
f2.name = 'theta_mean'
f3 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_mean']),axis=1).stack().reset_index(level=1, drop=True)
f3.name = 'alpha_mean'
f4 = derivedFeatures.apply(lambda x: pd.Series(x['beta_mean']),axis=1).stack().reset_index(level=1, drop=True)
f4.name = 'beta_mean'
f5 = derivedFeatures.apply(lambda x: pd.Series(x['all_mean']),axis=1).stack().reset_index(level=1, drop=True)
f5.name = 'all_mean'


f11 = derivedFeatures.apply(lambda x: pd.Series(x['delta_std']),axis=1).stack().reset_index(level=1, drop=True)
f11.name = 'delta_std'
f12 = derivedFeatures.apply(lambda x: pd.Series(x['theta_std']),axis=1).stack().reset_index(level=1, drop=True)
f12.name = 'theta_std'
f13 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_std']),axis=1).stack().reset_index(level=1, drop=True)
f13.name = 'alpha_std'
f14 = derivedFeatures.apply(lambda x: pd.Series(x['beta_std']),axis=1).stack().reset_index(level=1, drop=True)
f14.name = 'beta_std'
f15 = derivedFeatures.apply(lambda x: pd.Series(x['all_std']),axis=1).stack().reset_index(level=1, drop=True)
f15.name = 'all_std'

f16 = derivedFeatures.apply(lambda x: pd.Series(x['delta_cov']),axis=1).stack().reset_index(level=1, drop=True)
f16.name = 'delta_cov'
f17 = derivedFeatures.apply(lambda x: pd.Series(x['theta_cov']),axis=1).stack().reset_index(level=1, drop=True)
f17.name = 'theta_cov'
f18 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_cov']),axis=1).stack().reset_index(level=1, drop=True)
f18.name = 'alpha_cov'
f19 = derivedFeatures.apply(lambda x: pd.Series(x['beta_cov']),axis=1).stack().reset_index(level=1, drop=True)
f19.name = 'beta_cov'
f20 = derivedFeatures.apply(lambda x: pd.Series(x['all_cov']),axis=1).stack().reset_index(level=1, drop=True)
f20.name = 'all_cov'

f21 = derivedFeatures.apply(lambda x: pd.Series(x['delta_kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
f21.name = 'delta_kurtosis'
f22 = derivedFeatures.apply(lambda x: pd.Series(x['theta_kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
f22.name = 'theta_kurtosis'
f23 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
f23.name = 'alpha_kurtosis'
f24 = derivedFeatures.apply(lambda x: pd.Series(x['beta_kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
f24.name = 'beta_kurtosis'
f25 = derivedFeatures.apply(lambda x: pd.Series(x['all_kurtosis']),axis=1).stack().reset_index(level=1, drop=True)
f25.name = 'all_kurtosis'

f26 = derivedFeatures.apply(lambda x: pd.Series(x['delta_skew']),axis=1).stack().reset_index(level=1, drop=True)
f26.name = 'delta_skew'
f27 = derivedFeatures.apply(lambda x: pd.Series(x['theta_skew']),axis=1).stack().reset_index(level=1, drop=True)
f27.name = 'theta_skew'
f28 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_skew']),axis=1).stack().reset_index(level=1, drop=True)
f28.name = 'alpha_skew'
f29 = derivedFeatures.apply(lambda x: pd.Series(x['beta_skew']),axis=1).stack().reset_index(level=1, drop=True)
f29.name = 'beta_skew'
f30 = derivedFeatures.apply(lambda x: pd.Series(x['all_skew']),axis=1).stack().reset_index(level=1, drop=True)
f30.name = 'all_skew'

f31 = derivedFeatures.apply(lambda x: pd.Series(x['delta_rms']),axis=1).stack().reset_index(level=1, drop=True)
f31.name = 'delta_rms'
f32 = derivedFeatures.apply(lambda x: pd.Series(x['theta_rms']),axis=1).stack().reset_index(level=1, drop=True)
f32.name = 'theta_rms'
f33 = derivedFeatures.apply(lambda x: pd.Series(x['alpha_rms']),axis=1).stack().reset_index(level=1, drop=True)
f33.name = 'alpha_rms'
f34 = derivedFeatures.apply(lambda x: pd.Series(x['beta_rms']),axis=1).stack().reset_index(level=1, drop=True)
f34.name = 'beta_rms'
f35 = derivedFeatures.apply(lambda x: pd.Series(x['all_rms']),axis=1).stack().reset_index(level=1, drop=True)
f35.name = 'all_rms'


#concatenate unstacked features
featuresUnstack = pd.concat([t7,t1,t2,t3,t4,t5,t6,f1,f2,f3,f4,f5,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35],axis=1)

#apply targets to reshaped/flattened df
featuresUnstack['sz'] = 0
#for j in seizureRecordIDs:
 #   for idx,st in enumerate(featuresUnstack.index.get_level_values(level=0)):
  #      if (st == j):
   #         featuresUnstack.iloc[idx,-1] = 1

print(dataDir)

zsamples_per_segment: 512.0
Total Segment: 1800
zsamples_per_segment: 513.5644444444445
Total Segment: 1800
zsamples_per_segment: 513.7066666666667
Total Segment: 1799
zsamples_per_segment: 512.8533333333334
Total Segment: 1800
zsamples_per_segment: 512.0
Total Segment: 1800
zsamples_per_segment: 512.0
Total Segment: 1800
zsamples_per_segment: 512.0
Total Segment: 1800
zsamples_per_segment: 512.0
Total Segment: 1800
zsamples_per_segment: 345.7422222222222
Total Segment: 1800
zsamples_per_segment: 512.0
Total Segment: 1800
/home/mirwais/chbmit/chb12


In [37]:
featuresUnstack.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,max,mean,std,rms,kurtosis,cov,skewness,delta_mean,theta_mean,alpha_mean,...,theta_skew,alpha_skew,beta_skew,all_skew,delta_rms,theta_rms,alpha_rms,beta_rms,all_rms,sz
recordingID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
6,0,81.074481,23.191392,17.028299,28.771577,0.245685,0.734251,0.81939,113.420156,55.173061,12.337639,...,1.059885,1.044735,2.311122,4.762813,128.880476,60.548147,16.272674,14.810752,25.946728,0
6,1,87.716728,21.299603,16.148588,26.729198,0.811976,0.758164,0.936022,56.532983,26.693911,22.968671,...,-0.003903,0.657417,1.256947,3.708921,62.032064,26.835623,26.074586,13.804589,13.825585,0
6,2,124.835165,30.115995,22.350466,37.503553,0.907767,0.742146,0.960907,126.756238,30.996139,5.628478,...,0.810204,-0.337162,1.947537,8.267992,163.202333,39.574725,6.011882,15.841143,30.299293,0
6,3,83.809524,25.26862,17.642457,30.818168,-0.009507,0.698196,0.717889,93.678723,27.684179,14.095144,...,0.900715,0.898419,1.377298,7.810635,125.302833,31.476836,17.299176,10.47546,23.379195,0
6,4,88.107448,19.769536,16.761471,25.918747,0.974093,0.847843,1.105266,93.054465,11.702148,8.28221,...,-0.384066,0.763076,1.007502,7.325745,122.962023,12.446454,9.45648,9.499737,22.202309,0


### 4. Storing data in csv file

In [47]:
csvFilePath = '/home/mirwais/notebooks/paper_3/data/single_channel/chb12_time_freq_segments.csv'

In [39]:
featuresUnstack.to_csv(csvFilePath)

In [51]:
ch = pd.read_csv(csvFilePath)
ch.rename(columns={'Unnamed: 1': 'time'}, inplace=True)

In [52]:
ch.head()

Unnamed: 0,recordingID,time,max,mean,std,rms,kurtosis,cov,skewness,delta_mean,...,theta_skew,alpha_skew,beta_skew,all_skew,delta_rms,theta_rms,alpha_rms,beta_rms,all_rms,sz
0,6,0,81.074481,23.191392,17.028299,28.771577,0.245685,0.734251,0.81939,113.420156,...,1.059885,1.044735,2.311122,4.762813,128.880476,60.548147,16.272674,14.810752,25.946728,0
1,6,1,87.716728,21.299603,16.148588,26.729198,0.811976,0.758164,0.936022,56.532983,...,-0.003903,0.657417,1.256947,3.708921,62.032064,26.835623,26.074586,13.804589,13.825585,0
2,6,2,124.835165,30.115995,22.350466,37.503553,0.907767,0.742146,0.960907,126.756238,...,0.810204,-0.337162,1.947537,8.267992,163.202333,39.574725,6.011882,15.841143,30.299293,0
3,6,3,83.809524,25.26862,17.642457,30.818168,-0.009507,0.698196,0.717889,93.678723,...,0.900715,0.898419,1.377298,7.810635,125.302833,31.476836,17.299176,10.47546,23.379195,0
4,6,4,88.107448,19.769536,16.761471,25.918747,0.974093,0.847843,1.105266,93.054465,...,-0.384066,0.763076,1.007502,7.325745,122.962023,12.446454,9.45648,9.499737,22.202309,0


### 5. Lebaling data b/w normal and seizure

In [43]:
import re
import numpy as np

def extract_data_and_labels(edf_filename, summary_text):
    folder, basename = os.path.split(edf_filename)
    
    i_text_start = summary_text.index(basename)
    
    if 'File Name' in summary_text[i_text_start:]:
        i_text_stop = summary_text.index('File Name', i_text_start)
    else:
        i_text_stop = len(summary_text)
    assert i_text_stop > i_text_start

    file_text = summary_text[i_text_start:i_text_stop]
    if 'Seizure Start' in file_text:
        
        start_sec = int(int(re.search(r"Seizure Start Time: ([0-9]*) seconds", file_text).group(1))/2)
        end_sec = int(int(re.search(r"Seizure End Time: ([0-9]*) seconds", file_text).group(1))/2)
        
        print('Seizure {}:{} => {}'.format(start_sec, end_sec, int(basename[6:8])))
        
        #ch.loc[start_sec:end_sec,'sz']  = ((ch.time >= start_sec) & (ch.time <= end_sec) & (ch.recordingID == int(basename[6:8])))
        ch.loc[((ch.time >= start_sec) & (ch.time <= end_sec) & (ch.recordingID == int(basename[6:8]))), 'sz'] = 2
        #print(ch.loc[start_sec-1:end_sec+1,'sz'])

In [44]:
import glob
import os.path
try:
    file_numbers = [1,2,3,4,5,6]
    for i in file_numbers:
    
        subject_id = 12
        base_path = "/home/mirwais/chbmit/"
        edf_file_names = sorted(glob.glob(os.path.join(base_path, "chb{:02d}/*.edf".format(subject_id))))
        summary_file = os.path.join(base_path, "chb{:02d}/chb{:02d}-{}summary.txt".format(subject_id, subject_id, i))

        summary_content = open(summary_file,'r').read()
        for edf_file_name in edf_file_names:
            result = extract_data_and_labels(edf_file_name, summary_content)
            print(result)
        print('\n.....................................................')
            
except Exception:
    pass

Seizure 832:863 => 6
None
Seizure 713:719 => 8
None


In [100]:
ch[(ch.sz==2) & (ch.recordingID == 42)]
#ch.loc[((ch.time >= int(2213/2)) & (ch.time <= int(2236/2)) & (ch.recordingID == int(42))), 'sz'] = 2

Unnamed: 0,recordingID,time,max,mean,std,rms,kurtosis,cov,skewness,delta_mean,...,theta_skew,alpha_skew,beta_skew,all_skew,delta_rms,theta_rms,alpha_rms,beta_rms,all_rms,sz
16548,42,349,106.862027,31.873474,23.644511,39.686033,-0.068149,0.741824,0.767424,289.965902,...,0.041128,0.523571,1.102227,6.966688,339.032036,45.387629,29.819966,11.240714,60.849923,2
16549,42,350,133.821734,37.380952,27.945445,46.672085,0.035385,0.747585,0.761958,226.848688,...,0.130992,-0.586072,2.578115,3.977872,254.497790,159.280608,125.548146,29.460736,58.585061,2
16550,42,351,121.709402,34.465812,25.924437,43.127353,0.348060,0.752178,0.873110,57.499536,...,0.327445,0.356145,1.154101,3.381857,82.361361,76.325402,110.833927,43.874700,32.399325,2
16551,42,352,131.868132,38.233364,27.863275,47.309113,-0.196959,0.728769,0.731690,265.753510,...,0.027890,-0.111070,0.429308,5.012382,296.724405,51.544021,76.220881,56.367757,58.871239,2
16552,42,353,109.987790,31.839896,23.626452,39.648307,0.546255,0.742039,0.928504,64.310783,...,-0.658474,0.229583,1.644204,4.094370,67.822096,23.760721,68.493774,85.710507,36.655131,2
16553,42,354,126.007326,34.398657,26.650053,43.514284,0.624951,0.774741,1.013475,204.592077,...,0.045103,0.201274,1.307693,6.015526,266.024815,61.234983,24.095661,67.041969,54.590954,2
16554,42,355,145.543346,31.853632,24.102061,39.944502,1.910330,0.756650,1.154889,96.950063,...,0.580385,0.984443,1.418612,3.384327,107.999448,54.948219,35.402872,60.308023,31.792495,2
16555,42,356,84.590965,27.176435,17.898383,32.540908,-0.459034,0.658599,0.445492,118.712788,...,0.888296,0.876443,1.081481,4.916263,130.046057,30.652830,36.767924,25.573886,26.333123,2
16556,42,357,102.954823,28.574481,21.531255,35.778428,0.074832,0.753513,0.822971,49.417851,...,-1.072911,1.058032,2.775270,4.878245,62.720386,17.115472,86.944051,64.250830,30.840517,2
16557,42,358,145.152625,31.111111,23.980762,39.280761,2.131864,0.770810,1.234913,118.658505,...,0.851513,0.967958,1.873554,4.894053,148.378201,20.965518,34.824353,54.245845,34.057171,2


In [101]:
# Saving data
lebaledCSVFilePath = '/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb12_time_freq_segments.csv'
ch.to_csv(lebaledCSVFilePath)

In [102]:
ch = pd.read_csv(lebaledCSVFilePath)
ch.columns.size

41

In [105]:
ch.head()

Unnamed: 0.1,Unnamed: 0,recordingID,time,max,mean,std,rms,kurtosis,cov,skewness,...,theta_skew,alpha_skew,beta_skew,all_skew,delta_rms,theta_rms,alpha_rms,beta_rms,all_rms,sz
0,0,6,0,81.074481,23.191392,17.028299,28.771577,0.245685,0.734251,0.81939,...,1.059885,1.044735,2.311122,4.762813,128.880476,60.548147,16.272674,14.810752,25.946728,0
1,1,6,1,87.716728,21.299603,16.148588,26.729198,0.811976,0.758164,0.936022,...,-0.003903,0.657417,1.256947,3.708921,62.032064,26.835623,26.074586,13.804589,13.825585,0
2,2,6,2,124.835165,30.115995,22.350466,37.503553,0.907767,0.742146,0.960907,...,0.810204,-0.337162,1.947537,8.267992,163.202333,39.574725,6.011882,15.841143,30.299293,0
3,3,6,3,83.809524,25.26862,17.642457,30.818168,-0.009507,0.698196,0.717889,...,0.900715,0.898419,1.377298,7.810635,125.302833,31.476836,17.299176,10.47546,23.379195,0
4,4,6,4,88.107448,19.769536,16.761471,25.918747,0.974093,0.847843,1.105266,...,-0.384066,0.763076,1.007502,7.325745,122.962023,12.446454,9.45648,9.499737,22.202309,0


In [None]:
########################################### END ##############################################################

### Merge all patients data

In [2]:
df1 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb01_time_freq_segments.csv')
df1.columns.size

41

In [3]:
df2 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb02_time_freq_segments.csv')
df2.columns.size

41

In [121]:
df3 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb03_time_freq_segments.csv')
df3.columns.size

41

In [122]:
df4 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb04_time_freq_segments.csv')
df4.columns.size

41

In [123]:
df5 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb05_time_freq_segments.csv')
df5.columns.size

41

In [124]:
df6 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb06_time_freq_segments.csv')
df6.columns.size

41

In [126]:
df7a = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb07a_time_freq_segments.csv')
df7a.columns.size

41

In [127]:
df7b = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb07b_time_freq_segments.csv')
df7b.columns.size

41

In [128]:
frames = [df7a, df7b]
df7 = pd.concat(frames)
df7.columns.size

41

In [129]:
df8 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb08_time_freq_segments.csv')
df8.columns.size

41

In [131]:
df9a = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb09a_time_freq_segments.csv')
df9a.columns.size

41

In [132]:
df9b = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb09b_time_freq_segments.csv')
df9b.columns.size

41

In [133]:
frames = [df9a, df9b]
df9 = pd.concat(frames)
df9.columns.size

41

In [135]:
df10 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb10_time_freq_segments.csv')
df10.columns.size

41

In [136]:
df11 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb11_time_freq_segments.csv')
df11.columns.size

41

In [137]:
df12 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb12_time_freq_segments.csv')
df12.columns.size

41

In [138]:
df13 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb13_time_freq_segments.csv')
df13.columns.size

41

In [139]:
df14 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb14_time_freq_segments.csv')
df14.columns.size

41

In [140]:
df15 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb15_time_freq_segments.csv')
df15.columns.size

41

In [141]:
df16 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb16_time_freq_segments.csv')
df16.columns.size

41

In [142]:
df17 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb17_time_freq_segments.csv')
df17.columns.size

41

In [143]:
df18 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb18_time_freq_segments.csv')
df18.columns.size

41

In [144]:
df19 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb19_time_freq_segments.csv')
df19.columns.size

41

In [145]:
df20 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb20_time_freq_segments.csv')
df20.columns.size

41

In [146]:
df21 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb21_time_freq_segments.csv')
df21.columns.size

41

In [147]:
df22 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb22_time_freq_segments.csv')
df22.columns.size

41

In [149]:
df23a = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb23a_time_freq_segments.csv')
df23a.columns.size

41

In [150]:
df23b = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb23b_time_freq_segments.csv')
df23b.columns.size

41

In [153]:
frames = [df23a, df23b]
df23 = pd.concat(frames)
df23.columns.size

41

In [154]:
df24 = pd.read_csv('/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/chb24_time_freq_segments.csv')
df24.columns.size

41

In [168]:
frames = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24]

In [169]:
patients = ['patient1', 'patient2', 'patient3', 'patient4', 'patient5', 'patient6', 'patient7', 'patient8', 'patient9', 'patient10', 'patient11', 'patient12', 'patient13', 'patient14', 'patient15', 'patient16', 'patient17', 'patient18', 'patient19', 'patient20', 'patient21', 'patient22', 'patient23', 'patient24']

In [170]:
df = pd.concat(frames, keys=patients)

In [171]:
df.head()

Unnamed: 0.1,Unnamed: 1,Unnamed: 0,recordingID,time,max,mean,std,rms,kurtosis,cov,skewness,...,theta_skew,alpha_skew,beta_skew,all_skew,delta_rms,theta_rms,alpha_rms,beta_rms,all_rms,sz
patient1,0,0,3,0,59.97558,19.023199,13.769906,23.483876,-0.340116,0.723848,0.675192,...,0.086066,-0.98568,2.757306,7.073192,118.821004,20.718396,10.546594,2.259943,21.419911,0
patient1,1,1,3,1,66.227106,22.831197,14.945032,27.287681,-0.68187,0.654588,0.369419,...,0.746113,0.248997,2.152905,5.951275,134.287792,37.075077,19.050102,14.416575,25.437438,0
patient1,2,2,3,2,59.194139,21.111111,14.154828,25.417281,-0.813344,0.670492,0.34686,...,-0.621416,1.039449,1.93802,6.064859,101.978461,19.302844,12.404944,3.830406,18.534019,0
patient1,3,3,3,3,69.74359,21.67964,15.543676,26.676069,0.262172,0.716971,0.864396,...,-0.855432,0.919214,1.525064,7.6171,201.673466,27.200393,7.523121,2.058218,36.006894,0
patient1,4,4,3,4,82.246642,26.098901,18.784292,32.155906,0.010378,0.719735,0.817474,...,0.789821,0.507798,3.159692,3.915185,134.738958,101.218809,26.608509,14.383673,30.638491,0


In [172]:
# Saving all data
csvPath = '/home/mirwais/notebooks/paper_3/data/single_channel/lebaled/all_time_freq_segments.csv'
df.to_csv(csvPath)

In [175]:
data = pd.read_csv(csvPath)
data.shape

(336595, 43)

In [None]:
############################################## END ########################################################