In [2]:
from mne.filter import filter_data
import pandas as pd

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import pyedflib
import numpy as np
from datetime import timedelta

In [4]:
def readData(filename):
    #Extract data
    f = pyedflib.EdfReader(filename) #reading in edf file, not actual data
    sr = f.getSampleFrequencies()[0] #get sample freq out of edf file via edflib
    n = f.signals_in_file #defining nr of channels, f.xxx= property that gives info, std in library
    signal_labels = f.getSignalLabels() # f.xxx() = method from edf library
    sigbufs = np.zeros((n, f.getNSamples()[0])) #numpy zeroes matrix, here 2 dimensional
    for i in np.arange(n): 
        sigbufs[i, :] = f.readSignal(i) #reading in actual data in collums
    #Get starting time; now focusing on timestamps
    startingTime=filename[-19:-4] #extract starting time from file name
    startingTime=pd.to_datetime(startingTime, format='%Y%m%d_%H%M%S', errors='ignore') #function from pandas to convert string into date time
    timeStamps=[] #empty list
    last=startingTime 
    for time in range(f.getNSamples()[0]): #[0] take first value of NSamples
        timeStamps.append(last)
        last=last+pd.Timedelta('%d ms' %(1000/sr)) #%d refers to integer that will be provided after with %...
    f._close()
    return signal_labels, timeStamps, sigbufs, sr
    

In [5]:
def extractFeatures(data, timeStamps, sr, windowLength):
    #Filter data between 4 and 8 Hz
    filtData = filter_data(data, sr, 4,8) #filter_data = function of nme

    #Extract some sort of feature for all windows and corresponding time stamps
    numSamples=data.shape[1]
    features=np.zeros((data.shape[0],int(numSamples/(windowLength*sr))+1)) 
    #empty matrix again, with shape x and y
    alignedTimes=[] #empty list
    for i,win in enumerate(range(0,numSamples,windowLength*sr)): 
    #range: gives list with starting points of windows, range(start, end (not considered),stepwidth)
    #enumerate: x=1,2,3,etc; y=sample numbers which are beginnings of windows
        #Average power per channel; mean of squared; later: try sq root/ logarithmic scale??
        startOfWin=win
        endOfWin=win+windowLength*sr
        
        features[:,i]=np.mean(filtData[:,startOfWin:endOfWin]**2,axis=1)
        #fill features, all 9 rows of filtered data, from startWin - endWin; squares, sums and
        #takes mean of all 9 rows
        
        #Timestamp at beginning of each window
        alignedTimes.append(timeStamps[win])
    return alignedTimes, features

In [6]:
labels, timeStamps, data, sr = readData("13337_20180203_094429.bdf") 
#calling the defined function, first giving the names of wher you want to save the variables

In [22]:
alignedTimes, features = extractFeatures(data, timeStamps, sr, 60) 
#alignedTimes, features are variables were outcome of function extractFeatures will be stored
#extractFeatures() needs data, timeStamps, sr and 60 (number of observations calculated for mean tremor power e.g.)
# data is earlier defined as outcome of readData, sigbufs
dataFrame=pd.DataFrame(features.T,columns=labels) 
#pandas data structure for different types of data; create new dataFrame, containing (transposed .T) features,
# with labels as name for collums. features is here outcome of extractFeatures, now mean power

dataFrame.index=alignedTimes #order it by allignedTimes


Setting up band-pass filter from 4 - 8 Hz
l_trans_bandwidth chosen to be 2.0 Hz
h_trans_bandwidth chosen to be 2.0 Hz
Filter length of 82 samples (3.280 sec) selected


  filtData = filter_data(data, sr, 4,8) #filter_data = function of nme


In [23]:
def alignFeaturesESM(featureFrame,esmFrame,esmColumns,esmWindowLength=15): #define new function, by adding variable in
    # header incl. value (esmWindowLength=15), function takes atuomatically 15, unless otherwise specified 
    
    featColumns=featureFrame.keys().tolist() #The names of the features; xxx.keys() gives names of colums of matrix xxx
    combinedColumns=featColumns+esmColumns # Combine feature names and choice of esm columns
    esmFeatures=pd.DataFrame(columns=combinedColumns) # Create new empty dataframe with feature and esm columns
    
    for beep in range(esm.shape[0]): #Loop through all the ESM Beeps; each value in range(esm.shape[0]) is a 'beep'
        #esm.shape[0] -> all the esm data rows = all beeps
        
        beepTime=esm.index[beep] # Get the corresponding time, esm.index is defined as timeStamps
        timediff = np.min(np.abs(featureFrame.index-esmFrame.index[beep])) 
        # Find corresponding moment for beep time in the sensor data; sensordata row with least absolute time difference
        # with esm data row (beep) 
        if timediff>timedelta(minutes=esmWindowLength):
            # If corresponding time is too far off, remove beep; timediff has to larger than 15 minutes, only when data is incorrect
            print("Couldn't find corresponding sensor data")
            continue #if not continue, loop goes on; e.g. "break" stops whole iteration of loop
        pos=np.argmin(np.abs(featureFrame.index-esm.index[beep]))
        # For the smallest time difference find the 'position' in the sensor data
        # np.argmin gives the position of the minimal value (...th value)
        if pos>esmWindowLength: #to prevent position of beep value is before start of sensor measuring
            featData=featureFrame.iloc[pos-esmWindowLength:pos][featColumns].values
            # Get sensor data prior to the beep; create featData for .values of 15 minutes before corresponding 
            # sensor measurement with beepTime (pos-esmWindowLength (15 minutes) : pos)
            subIndex=featureFrame.index[pos-esmWindowLength:pos]
            # Get corresponding timestamps
            esmData=np.matlib.repmat(esmFrame.iloc[beep][columns],esmWindowLength,1)
            # Make ESM data rows equal to sensor rows, for every esm beep, there will be evaluated 'esmWindowLength'
            # number of sensor rows (means over minutes), so for example = 15 "minute-means" before esmBeep
            # Repeat ESM data for each data point in the window (with numpy function)
            combined=np.concatenate((featData,esmData),axis=1)
            #Combine ESM & feature data, axis determines in which direction the function works
            esmFeatures=esmFeatures.append(pd.DataFrame(combined,columns=featColumns+columns,index=subIndex))
            #Append combined data to the empty dataframe
    return esmFeatures

In [17]:
# Reading excel data, general code
esm = pd.read_excel('r_esm14d.xlsx')
esm['timeStamp']=pd.to_datetime(esm['timeStamp']) # Transform strings containing date/time to datetime objects
esm.index=esm['timeStamp'] # Set datetime objects as the index
columns=['stress', 'cheerful', 'relaxed', 'content', 'irritated','down', 'tremor', 'rigid', 'hinder', 'hinder2']
alignedFeatures=alignFeaturesESM(dataFrame,esm,columns) 
#call function, store it in new variable (alignedFeatures) and give variables to use (esmWindowLength is standard 15)
alignedFeatures.shape #check .shape of new variable / matrix

##Admins-MacBook-Pro-3:~ jeroenhabets$ pip install xlrd (in python terminal)

Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data
Couldn't find corresponding sensor data


(90, 19)

In [14]:
dataFrame.iloc[:10]

Unnamed: 0,X,Y,Z,AccX,AccY,AccZ,GyroX,GyroY,GyroZ
2018-02-03 09:44:29,0.002854,0.005329,0.005062,0.001967,0.00398,0.0037,779.047443,435.632256,245.858532
2018-02-03 09:45:29,0.001397,0.001088,0.000781,0.000886,0.000464,0.000708,67.322135,45.459644,18.257735
2018-02-03 09:46:29,0.000989,0.001577,0.00127,0.000566,0.00082,0.000753,30.558596,27.280573,7.546929
2018-02-03 09:47:29,0.001199,0.002447,0.001468,0.000763,0.001328,0.000932,41.287697,28.036058,12.95467
2018-02-03 09:48:29,0.001663,0.003236,0.001562,0.001,0.002034,0.000849,35.7972,38.834465,9.737345
2018-02-03 09:49:29,0.002542,0.018925,0.007569,0.001915,0.012126,0.00458,164.580566,58.28069,23.498772
2018-02-03 09:50:29,0.001018,0.020434,0.003406,0.000581,0.013226,0.00164,295.504543,43.613814,25.251973
2018-02-03 09:51:29,0.001375,0.023801,0.00383,0.000717,0.013126,0.00227,332.221042,47.812591,28.863938
2018-02-03 09:52:29,0.00125,0.025428,0.004388,0.000774,0.016709,0.002197,444.636705,51.902039,29.03091
2018-02-03 09:53:29,0.000842,0.018623,0.003146,0.000545,0.012348,0.001613,382.811563,36.158556,25.356662


In [None]:
# Visualizing some of our features:
dataFrame['AccX'].plot()

In [20]:
from scipy.stats import spearmanr

r,p=spearmanr(alignedFeatures['AccX'].values.tolist(),alignedFeatures['cheerful'].values.tolist(),nan_policy='omit')
print(r,p)

-0.30825035561877656 0.007133565820712296


In [19]:
alignedFeatures #see whole data matrix (15 prior average power (over 60 sec) values per each esm beep)

Unnamed: 0,X,Y,Z,AccX,AccY,AccZ,GyroX,GyroY,GyroZ,stress,cheerful,relaxed,content,irritated,down,tremor,rigid,hinder,hinder2
2018-02-03 10:00:29,0.000551826,0.000486185,0.000219272,0.00023106,0.000256368,0.000110662,5.96748,10.5498,3.18466,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:01:29,0.000425987,0.000397525,0.000261393,0.000149667,0.000161492,0.000106868,17.6827,9.2443,5.5118,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:02:29,0.00062335,0.000657149,0.000547251,0.000283531,0.000334467,0.000237226,17.2919,22.9813,4.6953,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:03:29,0.00519928,0.0103864,0.0109915,0.00671596,0.00724925,0.0118269,1797.22,1171.22,468.677,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:04:29,0.00185985,0.00374931,0.00206472,0.00189567,0.0027159,0.00241029,114.061,162.279,69.0521,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:05:29,0.000432971,0.000393872,0.000318178,0.000208214,0.000186161,0.00011723,12.6137,14.4873,4.81649,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:06:29,0.00170869,0.00492582,0.00224001,0.00112108,0.00317865,0.00112946,54.5102,53.8061,13.4321,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:07:29,0.000758669,0.0010251,0.000560628,0.000439888,0.000490932,0.000268065,19.9065,19.8046,6.42062,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:08:29,0.000535218,0.00116265,0.000816481,0.000256006,0.000560726,0.00039821,14.6671,14.0333,7.26638,4,2,3,3,1,1,3,2,5,5
2018-02-03 10:09:29,0.000393607,0.000318739,0.000146815,0.000138987,0.00016908,6.5625e-05,3.73868,5.22641,2.70181,4,2,3,3,1,1,3,2,5,5
