In [2]:
import numpy as np
import pandas as pd
import pickle


In [3]:
def load_data(file_path):
    S = pickle.load(open(file_path, 'rb'), encoding='latin1')#load pkl file as origin data
    S_X = S['signal']#extract signal data
    S_X_chest = S_X['chest']
    S_X_wrist = S_X['wrist']#divide sigbals into chest and wrist
    S_y = S['label']#extract heart rates
    
    S_X_chest_ACC = S_X_chest['ACC']
    chest_ACC_x, chest_ACC_y, chest_ACC_z = zip(*S_X_chest_ACC)#chest_ACC is a 3 dimension array，here divide it into 3 indiviuals 
    S_X_chest_ECG = S_X_chest['ECG']
    S_X_chest_Resp = S_X_chest['Resp']
    
    S_X_wrist_ACC = S_X_wrist['ACC']
    wrist_ACC_x, wrist_ACC_y, wrist_ACC_z = zip(*S_X_wrist_ACC)
    S_X_wrist_EDA = S_X_wrist['EDA']
    S_X_wrist_BVP = S_X_wrist['BVP']
    S_X_wrist_TEMP = S_X_wrist['TEMP']
    
    def extract_mean(column):
        col = np.asarray(column)
        a = len(column)   
        b = len(S_y)
        c = a//b
        d = int(b * c)#the number of saved rows
        new_col = col[0:d]
        extracted_column = np.mean(new_col.reshape(-1, c), axis=1)
        #calculate the mean of per e signals to fit the number of heart rates
        return extracted_column
    #save the means of signals per 8 seconds
    chest_ACC_x_mean = extract_mean(chest_ACC_x)
    chest_ACC_y_mean = extract_mean(chest_ACC_y)
    chest_ACC_z_mean = extract_mean(chest_ACC_z)
    chest_ECG_mean = extract_mean(S_X_chest_ECG)
    chest_Resp_mean = extract_mean(S_X_chest_Resp)
    wrist_ACC_x_mean = extract_mean(wrist_ACC_x)
    wrist_ACC_y_mean = extract_mean(wrist_ACC_y)
    wrist_ACC_z_mean = extract_mean(wrist_ACC_z)
    wrist_EDA_mean = extract_mean(S_X_wrist_EDA)
    wrist_BVP_mean = extract_mean(S_X_wrist_BVP)
    wrist_TEMP_mean = extract_mean(S_X_wrist_TEMP)

    def extract_std(column):
        col = np.asarray(column)
        a = len(column)   
        b = len(S_y)
        c = a//b
        d = int(b * c)#the number of saved rows
        new_col = col[0:d]
        extracted_column = np.std(new_col.reshape(-1, c), axis=1)
        #calculate the mean of per e signals to fit the number of heart rates
        return extracted_column
    #save the means of signals per 8 seconds
    chest_ACC_x_std = extract_std(chest_ACC_x)
    chest_ACC_y_std = extract_std(chest_ACC_y)
    chest_ACC_z_std = extract_std(chest_ACC_z)
    chest_ECG_std = extract_std(S_X_chest_ECG)
    chest_Resp_std = extract_std(S_X_chest_Resp)
    wrist_ACC_x_std = extract_std(wrist_ACC_x)
    wrist_ACC_y_std = extract_std(wrist_ACC_y)
    wrist_ACC_z_std = extract_std(wrist_ACC_z)
    wrist_EDA_std = extract_std(S_X_wrist_EDA)
    wrist_BVP_std = extract_std(S_X_wrist_BVP)
    wrist_TEMP_std = extract_std(S_X_wrist_TEMP)

    def extract_median(column):
        col = np.asarray(column)
        a = len(column)   
        b = len(S_y)
        c = a//b
        d = int(b * c)#the number of saved rows
        new_col = col[0:d]
        extracted_column = np.median(new_col.reshape(-1, c), axis=1)
        #calculate the mean of per e signals to fit the number of heart rates
        return extracted_column
    #save the means of signals per 8 seconds
    chest_ACC_x_median = extract_median(chest_ACC_x)
    chest_ACC_y_median = extract_median(chest_ACC_y)
    chest_ACC_z_median = extract_median(chest_ACC_z)
    chest_ECG_median = extract_median(S_X_chest_ECG)
    chest_Resp_median = extract_median(S_X_chest_Resp)
    wrist_ACC_x_median = extract_median(wrist_ACC_x)
    wrist_ACC_y_median = extract_median(wrist_ACC_y)
    wrist_ACC_z_median = extract_median(wrist_ACC_z)
    wrist_EDA_median = extract_median(S_X_wrist_EDA)
    wrist_BVP_median = extract_median(S_X_wrist_BVP)
    wrist_TEMP_median = extract_median(S_X_wrist_TEMP)



    dataset = pd.DataFrame({'label':S_y,
                            'chest_ACC_x_mean': chest_ACC_x_mean, 
                            'chest_ACC_y_mean': chest_ACC_y_mean, 
                            'chest_ACC_z_mean': chest_ACC_z_mean, 
                            'chest_ECG_mean': chest_ECG_mean, 
                            'chest_Resp_mean': chest_Resp_mean,
                            'wrist_ACC_x_mean': wrist_ACC_x_mean, 
                            'wrist_ACC_y_mean': wrist_ACC_y_mean, 
                            'wrist_ACC_z_mean': wrist_ACC_z_mean, 
                            'wrist_EDA_mean': wrist_EDA_mean,
                            'wrist_BVP_mean': wrist_BVP_mean,
                            'wrist_TEMP_mean': wrist_TEMP_mean,
                            
                            'chest_ACC_x_std': chest_ACC_x_std, 
                            'chest_ACC_y_std': chest_ACC_y_std, 
                            'chest_ACC_z_std': chest_ACC_z_std, 
                            'chest_ECG_std': chest_ECG_std, 
                            'chest_Resp_std': chest_Resp_std,
                            'wrist_ACC_x_std': wrist_ACC_x_std, 
                            'wrist_ACC_y_std': wrist_ACC_y_std, 
                            'wrist_ACC_z_std': wrist_ACC_z_std, 
                            'wrist_EDA_std': wrist_EDA_std,
                            'wrist_BVP_std': wrist_BVP_std,
                            'wrist_TEMP_std': wrist_TEMP_std,

                            'chest_ACC_x_median': chest_ACC_x_median, 
                            'chest_ACC_y_median': chest_ACC_y_median, 
                            'chest_ACC_z_median': chest_ACC_z_median, 
                            'chest_ECG_median': chest_ECG_median, 
                            'chest_Resp_median': chest_Resp_median,
                            'wrist_ACC_x_median': wrist_ACC_x_median, 
                            'wrist_ACC_y_median': wrist_ACC_y_median, 
                            'wrist_ACC_z_median': wrist_ACC_z_median, 
                            'wrist_EDA_median': wrist_EDA_median,
                            'wrist_BVP_median': wrist_BVP_median,
                            'wrist_TEMP_median': wrist_TEMP_median,


                           }, 
                            columns=['label', 'chest_ACC_x_mean', 'chest_ACC_y_mean', 'chest_ACC_z_mean', 'chest_ECG_mean', 'chest_Resp_mean', 
                                     'wrist_ACC_x_mean', 'wrist_ACC_y_mean', 'wrist_ACC_z_mean', 'wrist_EDA_mean', 'wrist_BVP_mean', 'wrist_TEMP_mean',
                                     'chest_ACC_x_std', 'chest_ACC_y_std', 'chest_ACC_z_std', 'chest_ECG_std', 'chest_Resp_std', 
                                     'wrist_ACC_x_std', 'wrist_ACC_y_std', 'wrist_ACC_z_std', 'wrist_EDA_std', 'wrist_BVP_std', 'wrist_TEMP_std',
                                     'chest_ACC_x_median', 'chest_ACC_y_median', 'chest_ACC_z_median', 'chest_ECG_median', 'chest_Resp_median', 
                                     'wrist_ACC_x_median', 'wrist_ACC_y_median', 'wrist_ACC_z_median', 'wrist_EDA_median', 'wrist_BVP_median', 'wrist_TEMP_median'])
    
    for k,v in S["questionnaire"].items() :
        dataset[k] = v
    rpeaks = S['rpeaks']
    counted_rpeaks = []
    index = 0 # index of rpeak element
    time = 175 # time portion
    count = 0 # number of rpeaks

    while(index < len(rpeaks)):
        rpeak = rpeaks[index]

        if(rpeak > time): # Rpeak appears after the time portion
            counted_rpeaks.append(count)
            count = 0
            time += 175

        else:
            count += 1
            index += 1
    # The rpeaks will probably end before the time portion so we need to fill the last portions with 0
    if(len(counted_rpeaks) < np.size(dataset, axis = 0)):
        while(len(counted_rpeaks) < np.size(dataset, axis = 0)):
            counted_rpeaks.append(0)
    peaks = pd.DataFrame(counted_rpeaks)
    peaks.columns = ["Rpeaks"]
    dataset = dataset.join(peaks)
    
    activity = pd.DataFrame(S["activity"]).astype(int)
    activity.columns = ["Activity"]
    dataset = dataset.join(activity)

    dataset['Subject'] = S["subject"]
    
    return dataset   
    

In [4]:

dataframes = {}
for i in range(1,16):
    dataframes['S' + str(i)] = load_data('PPG_FieldStudy/S' + str(i) + '/S' + str(i) + '.pkl')
dataframes.keys() 

dict_keys(['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S12', 'S13', 'S14', 'S15'])

In [5]:
S1 = load_data('./PPG_FieldStudy/S1/S1.pkl')
data = dataframes['S1']
for i in range(2,16):
    data = data.append(dataframes['S' + str(i)])
data.shape

(64697, 43)

In [6]:
data

Unnamed: 0,label,chest_ACC_x_mean,chest_ACC_y_mean,chest_ACC_z_mean,chest_ECG_mean,chest_Resp_mean,wrist_ACC_x_mean,wrist_ACC_y_mean,wrist_ACC_z_mean,wrist_EDA_mean,...,wrist_TEMP_median,WEIGHT,Gender,AGE,HEIGHT,SKIN,SPORT,Rpeaks,Activity,Subject
0,49.611369,0.851230,-0.066021,-0.369793,0.039022,1.320817,-0.761230,-0.076416,0.671875,4.716672,...,32.155,78.0,m,34,182.0,3,6,0,0,S1
1,50.323992,0.853035,-0.064653,-0.372883,-0.037044,-1.524349,-0.766602,-0.076172,0.680420,4.692810,...,32.150,78.0,m,34,182.0,3,6,1,0,S1
2,52.708336,0.862127,-0.063661,-0.328341,0.021329,0.497232,-0.871338,-0.362305,0.287842,4.709465,...,32.150,78.0,m,34,182.0,3,6,0,0,S1
3,55.640794,0.884370,-0.063035,-0.265127,0.006393,0.409606,-0.979004,-0.150635,0.195068,4.748541,...,32.140,78.0,m,34,182.0,3,6,0,0,S1
4,57.658406,0.889886,-0.042930,-0.247533,-0.024418,-0.779251,-1.002930,-0.099609,0.148926,4.765036,...,32.155,78.0,m,34,182.0,3,6,0,0,S1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3961,75.475622,0.894839,0.011158,-0.214601,-0.008444,0.712193,-0.710205,0.671631,0.156250,0.956779,...,34.015,79.0,m,28,183.0,2,5,1,2,S15
3962,77.524511,0.895650,0.014221,-0.217153,0.000331,0.364633,-0.454102,-0.193848,0.840088,0.943969,...,34.000,79.0,m,28,183.0,2,5,0,2,S15
3963,78.714945,0.893826,0.022179,-0.202851,0.021754,-0.179085,-0.669678,-0.124756,0.505371,0.943969,...,34.000,79.0,m,28,183.0,2,5,1,2,S15
3964,80.413954,0.896889,0.022090,-0.203860,-0.013849,0.442674,-0.351074,-0.044678,0.890381,0.937724,...,34.000,79.0,m,28,183.0,2,5,0,2,S15


In [7]:
data.to_csv("dataset_cal.csv", index=False)