In [1]:
import IPython
print('IPython:', IPython.__version__)

import numpy as np

import scipy
print('scipy:', scipy.__version__)

import matplotlib.pyplot as plt

import sklearn
print('scikit-learn:', sklearn.__version__)

import seaborn
print('seaborn', seaborn.__version__)

import mne
print('mne', mne.__version__)

import pyedflib
print('pyedflib', pyedflib.__version__)
#to read/write EDF files = python edf library

import pandas as pd

from os import listdir
    #method listdir() returns a list containing the names of the entries in the directory given by path.
from os.path import isfile, join
    #returns true if file in path is an existing regular file

from mne.filter import filter_data

from datetime import timedelta
from scipy.signal import find_peaks_cwt

IPython: 6.4.0
scipy: 1.1.0
scikit-learn: 0.19.1
seaborn 0.8.1
mne 0.16.2
pyedflib 0.1.13


In [2]:
def measurementtime(filenames): 
    
    total = 0
    
    for file in filenames:
        try:
            f = pyedflib.EdfReader(mypath+file) 
            sr = f.getSampleFrequencies()[0] 
            #get sample freq out of edf file via edflib
        
            total += f.getNSamples()[0]/sr

        except:
            print('Broken file ' + file)
        #reading in edf file
      
        
    return total

In [3]:
# create list of files per L/R/chest from directory (mypath)

leftSensors = ['13797','13799','13794']
rightSensors = ['13805','13801','13793']
chestSensors = ['13804','13792','13803']

featureWindowLength=60
windowLength=60
esmWindowLength=15
datatot =[]

for pt in range(110001,110007+1):

    path='Y:/ADBS'
    mypath = path+'/MOX/' + str(pt) + '/'
    bdffiles = [f for f in listdir(mypath) if isfile(join(mypath,f)) and f[0]!='_' and f[-3:] =='edf']
    #bdffiles are the files in mypath, not directories

    leftFiles = []
    rightFiles = []
    chestFiles = []

    for f in bdffiles:
        if f[0:5] in leftSensors:
            leftFiles.append(f)
        elif f[0:5] in rightSensors:
            rightFiles.append(f)
        elif f[0:5] in chestSensors:
            chestFiles.append(f)

    leftFiles=sorted(leftFiles)
    rightFiles=sorted(rightFiles)
    chestFiles=sorted(chestFiles)
    
    timeL = measurementtime(leftFiles)
    timeR = measurementtime(rightFiles)
    timeC = measurementtime(chestFiles)
    timeA = (timeL+ timeR+ timeC)/3
    
        
    timeHperD = str(timedelta(seconds=(timeA/14)))
    
    
    data = [pt,timeL,timeR,timeC,timeA,timeHperD]
    datatot.append(data)
   
dftot = pd.DataFrame(datatot)        
dftot.columns = ['castorID', 'measurementtime Left (s)','measurementtime Right (s)','measurementtime Chest (s)','measurementtime Average (s)','hours per day']    

Broken file 13799_20180831_021222.edf
Broken file 13799_20180831_021236.edf
Broken file 13799_20180906_021427.edf
Broken file 13799_20180908_024716.edf
Broken file 13799_20180909_023845.edf
Broken file 13799_20180909_024032.edf
Broken file 13797_20180829_064338.edf
Broken file 13804_20180829_064338.edf
Broken file 13803_20180911_230039.edf
Broken file 13799_20181003_070253.edf


In [4]:
#open datafile with measurementtimes of all sensors in seconds

dftot

Unnamed: 0,castorID,measurementtime Left (s),measurementtime Right (s),measurementtime Chest (s),measurementtime Average (s),hours per day
0,110001,993318.0,993057.0,992596.0,992990.333333,19:42:07.880952
1,110002,673259.0,673260.0,673257.0,673258.666667,13:21:29.904762
2,110003,742992.0,742856.0,743124.0,742990.666667,14:44:30.761905
3,110004,776549.0,776470.0,776571.0,776530.0,15:24:26.428571
4,110005,802839.0,802837.0,802818.0,802831.333333,15:55:45.095238
5,110006,765176.0,765263.0,765278.0,765239.0,15:10:59.928571
6,110007,596948.0,596942.0,596945.0,596945.0,11:50:38.928571


In [None]:
# prepare ESM export from server
#esm = df = pd.read_stata(path+'/esmDataPilot/PRDB_20180913T105337/SANPAR_BE.dta',convert_categoricals = False)
#morning = df = pd.read_stata(path+'/esmDataPilot/PRDB_20180913T105337/SANPAR_MOR.dta',convert_categoricals = False)
#evening = df = pd.read_stata(path+'/esmDataPilot/PRDB_20180913T105337/SANPAR_EVE.dta',convert_categoricals = False)

esm = df = pd.read_stata(path+'/esmDataPilot/PRDB_20181025T115653/SANPAR_BE.dta',convert_categoricals = False)
morning = df = pd.read_stata(path+'/esmDataPilot/PRDB_20181025T115653/SANPAR_MOR.dta',convert_categoricals = False)
evening = df = pd.read_stata(path+'/esmDataPilot/PRDB_20181025T115653/SANPAR_EVE.dta',convert_categoricals = False)

mapNames={}
for i in range(20):
    mapNames[9009989+i]=110001+i

esm['castorID'] = [mapNames[e] for e in esm['subjno']]
morning['castorID'] = [mapNames[e] for e in morning['subjno']]
evening['castorID'] = [mapNames[e] for e in evening['subjno']]


In [None]:
#remove the dates before and after the 14 day period. Patients might completed additional questionnaires.

esm1 = esm[(esm['castorID']==110001) & (esm['_date']!='2018-08-27') & (esm['_date']!='2018-09-11')]
esm2= esm[(esm['castorID']==110002) & (esm['_date']!='2018-08-28') & (esm['_date']!='2018-09-12')]
esm3 = esm[(esm['castorID']==110003) & (esm['_date']!='2018-08-28') & (esm['_date']!='2018-09-12')]
esm4 = esm[(esm['castorID']==110004) & (esm['_date']!='2018-09-20') & (esm['_date']!='2018-10-05')]
esm5 = esm[(esm['castorID']==110005) & (esm['_date']!='2018-10-01') & (esm['_date']!='2018-10-16')]
esm6 = esm[(esm['castorID']==110006) & (esm['_date']!='2018-10-01') & (esm['_date']!='2018-10-16')]
esm7 = esm[(esm['castorID']==110007) & (esm['_date']!='2018-10-09') & (esm['_date']!='2018-10-24') & (esm['_date']!='2018-10-25')]

morning1 = morning[(morning['castorID']==110001) & (morning['_date']!='2018-08-27') & (morning['_date']!='2018-09-11')]
morning2= morning[(morning['castorID']==110002) & (morning['_date']!='2018-08-28') & (morning['_date']!='2018-09-12')]
morning3 = morning[(morning['castorID']==110003) & (morning['_date']!='2018-08-28') & (morning['_date']!='2018-09-12')]
morning4 = morning[(morning['castorID']==110004) & (morning['_date']!='2018-09-20') & (morning['_date']!='2018-10-05')]
morning5 = morning[(morning['castorID']==110005) & (morning['_date']!='2018-10-01') & (morning['_date']!='2018-10-16')]
morning6 = morning[(morning['castorID']==110006) & (morning['_date']!='2018-10-01') & (morning['_date']!='2018-10-16')]
morning7 = morning[(morning['castorID']==110007) & (morning['_date']!='2018-10-09') & (morning['_date']!='2018-10-24')& (morning['_date']!='2018-10-25')]

evening1 = evening[(evening['castorID']==110001) & (evening['_date']!='2018-08-27') & (evening['_date']!='2018-09-11')]
evening2= evening[(evening['castorID']==110002) & (evening['_date']!='2018-08-28') & (evening['_date']!='2018-09-12')]
evening3 = evening[(evening['castorID']==110003) & (evening['_date']!='2018-08-28') & (evening['_date']!='2018-09-12')]
evening4 = evening[(evening['castorID']==110004) & (evening['_date']!='2018-09-20') & (evening['_date']!='2018-10-05')]
evening5 = evening[(evening['castorID']==110005) & (evening['_date']!='2018-10-01') & (evening['_date']!='2018-10-16')]
evening6 = evening[(evening['castorID']==110006) & (evening['_date']!='2018-10-01') & (evening['_date']!='2018-10-16')]
evening7 = evening[(evening['castorID']==110007) & (evening['_date']!='2018-10-09') & (evening['_date']!='2018-10-24')]

esmcor = pd.concat([esm1,esm2,esm3,esm4,esm5,esm6,esm7],ignore_index=True)
morningcor = pd.concat([morning1,morning2,morning3,morning4,morning5,morning6,morning7],ignore_index=True)
eveningcor = pd.concat([evening1,evening2,evening3,evening4,evening5,evening6,evening7],ignore_index=True)


In [None]:
#count the number of valid beeps and calculate the percentage of completed beeps

numberofBeeps = pd.value_counts(esmcor['castorID'].values,sort=False)
numberofMorning = pd.value_counts(morningcor['castorID'].values,sort=False)
numberofEvening = pd.value_counts(eveningcor['castorID'].values,sort=False)

percentageofBeeps = numberofBeeps/(14*7)*100
percentageofMorning = numberofMorning/14*100
percentageofEvening = numberofEvening/14*100

In [None]:
#add these values to the previous database

dftot['numberofBeeps']=numberofBeeps.values
dftot['numberofMorning']=numberofMorning.values
dftot['numberofEvening']=numberofEvening.values

dftot['percentageofBeeps']= percentageofBeeps.values
dftot['percentageofMorning']= percentageofMorning.values
dftot['percentageofEvening']= percentageofEvening.values
dftot

In [None]:
#evaluation form. # 0 = no, 1 = yes

evaluation = pd.read_excel(path+'/evaluation.xlsx')
evaluation

In [None]:
#calculate averages and show average dataframe

meanTime = dftot['measurementtime Average (s)'].mean()
meanTimeperD = str(timedelta(seconds=(meanTime/14)))
meanBeep = dftot['numberofBeeps'].mean()
meanMorning = dftot['numberofMorning'].mean()
meanEvening = dftot['numberofEvening'].mean()
meanPercentageBeep = dftot['percentageofBeeps'].mean()
meanPercentageMorning = dftot['percentageofMorning'].mean()
meanPercentageEvening = dftot['percentageofEvening'].mean()

averagedata = pd.DataFrame({'mean time':[meanTime],'mean hours per day': [meanTimeperD],'mean beeps':[meanBeep],'mean morning':[meanMorning],'mean evening':[meanEvening],'mean percentage beeps':[meanPercentageBeep],'mean percentage morning':[meanPercentageMorning], 'mean percentage evening':[meanPercentageEvening]})
averagedata

In [None]:
fig,ax = plt.subplots()
data_line = ax.plot(dftot['numberofBeeps'], label = 'Number of beeps', marker = 'o')
mean_line = ax.plot([dftot['numberofBeeps'].mean()]*len(numberofBeeps), label = 'Mean', linestyle = '--')

legend = ax.legend(loc='upper right')
plt.axis([0, 6, 0, 98])
plt.title('Number of completed beeps')
plt.show()

In [None]:
fig,ax = plt.subplots(figsize=(10,5))
data_line = ax.plot(dftot['percentageofBeeps'], label = 'Completed beeps', marker = 'o')
mean_line = ax.plot([dftot['percentageofBeeps'].mean()]*len(numberofBeeps), label = 'Mean', linestyle = '--')

legend = ax.legend(loc='upper right')
plt.axis([0, 6, 0, 100])
plt.title('Percentage of completed beeps per participant')
plt.show()