In [1]:
import numpy as np
import pandas as pd
import os
import re
import pyedflib

from scipy.signal import butter, lfilter
from pyentrp import entropy as ent
from scipy.signal import stft
from scipy.signal import welch
from scipy.signal import boxcar
from scipy.stats import variation
from scipy.stats import skew
from scipy.stats import kurtosis
from scipy.stats import describe

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def generateEEGdataDF(file, fft_seg_length):
#this function loads the data files, and parses the data into an appropriate dataframe
    
    #open the file, catch exception if file is already open
    try:
        f = pyedflib.EdfReader(file)
    except OSError:
        print('File already open')
        f._close()
    
    #load relevent parameters into df
    numChannels = f.signals_in_file
    channelName=[]
    channelSamples=[]
    Fs=[]
    traces=[]
    for channel in range(numChannels):
        channelName.append(f.getLabel(channel))
        channelSamples.append(f.getNSamples()[channel])
        Fs.append(f.getSampleFrequency(channel))
        traces.append(f.readSignal(channel))
    
    EEGdata=pd.DataFrame([channelName,channelSamples,Fs,traces],index=['channelName','channelSamples','Fs','traces'])
    
    #get engineered features from loaded data, add to existing df
    segmentedFFT(EEGdata,fft_seg_length)
    
    #remember to close the file!
    f._close()
    
    return EEGdata.transpose()

In [3]:
def segmentedFFT(EEGdata,seg_length):
#this function calculates Short Fourier Transform
#length specified (seg_length)    
    for i in range(1,2):
        
        samples_per_segment = EEGdata.loc['channelSamples'][i]/(EEGdata.loc['Fs'][i]/seg_length)
        tot_segments = np.floor(EEGdata.loc['channelSamples'][i]/samples_per_segment).astype(np.int)
        
        #calculate the start and end times of each segment
        startSamples = [np.int(x) for x in range(tot_segments)*samples_per_segment]
        endSamples = [np.int(x) for x in startSamples + samples_per_segment]
        
        
        #generate features for each segment
        mean_data=[]
        
        delta_mean=[]

            
        data = welch(EEGdata.loc['traces'], fs=EEGdata.loc['Fs'],
                                                    window='hann', nperseg=None, noverlap=None,nfft=None, detrend='constant', return_onesided=True, scaling='density')
            
            # Power spectral density
            #Zxx_delta, f1 = bandpass_filter(data[1], data[0],0,4)
        plt.figure()
        plt.semilogy(data[0], np.sqrt(data[1]))
        plt.xlabel('frequency [Hz]')
        plt.ylabel('Linear spectrum [V RMS]')
        plt.show()
            #Time Domain Features
        mean_data.append(np.mean(np.absolute(EEGdata.loc['traces'][i][st:en])))            

            
        
        EEGdata.at['delta_mean',i]=delta_mean

        
    print('zsamples_per_segment: {}\nTotal Segment: {}'.format(samples_per_segment,tot_segments))


In [4]:
dataDir='/home/mirwais/chbmit/chb01/practice/practice'

#create listings of recordings that contain seizures
fileListing=os.listdir(dataDir)
seizureFiles = [x for x in fileListing if 'seizure' in x and '.edf' in x]
normalFiles = [x for x in fileListing if 'seizure' not in x and '.edf' in x]
seizureRecordIDs = [np.int(re.search('(?<=_)(.*?)(?=.edf)',s).group(0).lstrip("0")) for s in seizureFiles]
normalPaths = [os.path.join(dataDir,s) for s in normalFiles]

#aggregate data into df
studyDataList=[]
studyKeyList = []
for idx,files in enumerate(normalPaths):
    #For 1Hour long file: (16/225) => 1 sec,
    #For 2Hours long file: (16/450) => 1 sec,
    #For 4Hours long files: (16/900) => 1 sec
    studyDataList.append(generateEEGdataDF(files,(32/225)))
    studyKeyList.append(np.int(re.search('(?<=_)(.*?)(?=.edf)',normalFiles[idx]).group(0).lstrip("0")))
    
studyDF = pd.concat(studyDataList,axis=0,keys=studyKeyList, names=['recordingID', 'electrode'])
studyDF=studyDF[studyDF.channelName != '-']
studyDF.sort_index(axis=0,level=0,inplace=True,sort_remaining=False)

  .format(nperseg, input_length))


TypeError: 1st argument must be a real sequence 2