In [68]:
import numpy as np
import pandas as pd
import pickle

In [63]:
def load_data(path):
    with open(path, "rb") as f:
        data = pickle.load(f, encoding="latin-1")
    
    signal = pd.DataFrame(data["signal"])
    ACC = pd.DataFrame(signal["chest"].ACC)
    ACC = ACC.iloc[::175, :]
    ACC.columns = ["ACC_x", "ACC_y", "ACC_z"]
    ACC.reset_index(drop = True, inplace=True)
    
    ECG = pd.DataFrame(signal["chest"].ECG)
    ECG = ECG.iloc[::175, :]
    ECG.reset_index(drop = True, inplace=True)
    
    Resp = pd.DataFrame(signal["chest"].Resp)
    Resp = Resp.iloc[::175, :]
    Resp.columns = ["Resp"]
    Resp.reset_index(drop = True, inplace=True)
    
    chest = pd.concat([ACC], sort=False)
    chest["Resp"] = Resp
    chest["ECG"] = ECG
    chest.reset_index(drop=True, inplace=True)
    chest = chest.add_prefix('chest_')
    
    ACC = pd.DataFrame(signal["wrist"].ACC)
    ACC = ACC.iloc[::8, :]
    ACC.columns = ["ACC_x", "ACC_y", "ACC_z"]
    ACC.reset_index(drop = True, inplace=True)
    
    EDA = pd.DataFrame(signal["wrist"].EDA)
    EDA.columns = ["EDA"]
    
    BVP = pd.DataFrame(signal["wrist"].BVP)
    BVP = BVP.iloc[::16, :]
    BVP.columns = ["BVP"]
    BVP.reset_index(drop = True, inplace=True)
    
    TEMP = pd.DataFrame(signal["wrist"].TEMP)
    TEMP.columns = ["TEMP"]
    
    wrist = pd.concat([ACC], sort=False)
    wrist["BVP"] = BVP
    wrist["TEMP"] = TEMP
    wrist.reset_index(drop = True, inplace=True)
    wrist = wrist.add_prefix('wrist_')
    
    signals = chest.join(wrist)
    for k,v in data["questionnaire"].items() :
        signals[k] = v
    
    rpeaks = data['rpeaks']
    counted_rpeaks = []
    index = 0 # index of rpeak element
    time = 175 # time portion
    count = 0 # number of rpeaks

    while(index < len(rpeaks)):
        rpeak = rpeaks[index]

        if(rpeak > time): # Rpeak appears after the time portion
            counted_rpeaks.append(count)
            count = 0
            time += 175

        else:
            count += 1
            index += 1
    # The rpeaks will probably end before the time portion so we need to fill the last portions with 0
    if(len(counted_rpeaks) < np.size(signals, axis = 0)):
        while(len(counted_rpeaks) < np.size(signals, axis = 0)):
            counted_rpeaks.append(0)
    peaks = pd.DataFrame(counted_rpeaks)
    peaks.columns = ["Rpeaks"]
    signals = signals.join(peaks)
    
    activity = pd.DataFrame(data["activity"]).astype(int)
    activity.columns = ["Activity"]
    signals = signals.join(activity)
    
    label = pd.DataFrame(data["label"])

    label = pd.DataFrame(np.repeat(label.values,8,axis=0))
    label.columns = ["Label"]
    if(np.size(label, axis = 0) < np.size(activity, axis = 0)):
        mean = label.mean()
        while(np.size(label, axis = 0) < np.size(activity, axis = 0)):
            label = label.append(mean, ignore_index=True)
    
    signals = signals.join(label)
    
    signals['Subject'] = data["subject"]
    return signals

In [69]:
path_1 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S1/S1.pkl'
path_2 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S2/S2.pkl'
path_3 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S3/S3.pkl'
path_4 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S4/S4.pkl'
path_5 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S5/S5.pkl'
path_6 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S6/S6.pkl'
path_7 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S7/S7.pkl'
path_8 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S8/S8.pkl'
path_9 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S9/S9.pkl'
path_10 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S10/S10.pkl'
path_11 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S11/S11.pkl'
path_12 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S12/S12.pkl'
path_13 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S13/S13.pkl'
path_14 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S14/S14.pkl'
path_15 = 'H:/MSC Data Science/Dissertation/data/PPG_FieldStudy/S15/S15.pkl'


In [70]:
S1 = load_data(path_1)
S2 = load_data(path_2)
S3 = load_data(path_3)
S4 = load_data(path_4)
S5 = load_data(path_5)
S6 = load_data(path_6)
S7 = load_data(path_7)
S8 = load_data(path_8)
S9 = load_data(path_9)
S10 = load_data(path_10)
S11 = load_data(path_11)
S12 = load_data(path_12)
S13 = load_data(path_13)
S14 = load_data(path_14)
S15 = load_data(path_15)
print('load completed')

load completed


In [76]:
df_groups = [S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14,S15]
data = pd.concat(df_groups)
print(df)

       chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_Resp  chest_ECG  \
0           0.8564      -0.0678      -0.3656    4.441833   0.015610   
1           0.8514      -0.0654      -0.3688    4.876709  -0.015747   
2           0.8514      -0.0662      -0.3708    3.340149  -0.008743   
3           0.8594      -0.0632      -0.3640    0.740051  -0.339523   
4           0.8510      -0.0674      -0.3694   -1.475525  -0.089905   
...            ...          ...          ...         ...        ...   
31751       0.8878       0.0122      -0.2330    2.789307  -0.281845   
31752       0.8962      -0.0184      -0.2280    0.563049   0.075851   
31753       0.9010       0.0006      -0.2102   -1.075745   0.017166   
31754       0.8906      -0.0124      -0.2238   -2.117920  -0.304184   
31755       0.8888       0.0334      -0.2040   -2.513123  -0.062943   

       wrist_ACC_x  wrist_ACC_y  wrist_ACC_z  wrist_BVP  wrist_TEMP  WEIGHT  \
0        -0.765625    -0.078125     0.671875       7.28       32.13 

In [77]:
remove = ["Subject", "Activity"]
features = [column for column in list(data.columns) if column not in remove]

In [78]:
data['Gender'].replace(' f', 0, inplace=True)
data['Gender'].replace(' m', 1, inplace=True)
data

Unnamed: 0,chest_ACC_x,chest_ACC_y,chest_ACC_z,chest_Resp,chest_ECG,wrist_ACC_x,wrist_ACC_y,wrist_ACC_z,wrist_BVP,wrist_TEMP,WEIGHT,Gender,AGE,HEIGHT,SKIN,SPORT,Rpeaks,Activity,Label,Subject
0,0.8564,-0.0678,-0.3656,4.441833,0.015610,-0.765625,-0.078125,0.671875,7.28,32.13,78.0,1,34,182.0,3,6,0,0,49.611369,S1
1,0.8514,-0.0654,-0.3688,4.876709,-0.015747,-0.765625,-0.078125,0.671875,-11.36,32.16,78.0,1,34,182.0,3,6,1,0,49.611369,S1
2,0.8514,-0.0662,-0.3708,3.340149,-0.008743,-0.765625,-0.078125,0.671875,-51.27,32.16,78.0,1,34,182.0,3,6,0,0,49.611369,S1
3,0.8594,-0.0632,-0.3640,0.740051,-0.339523,-0.750000,-0.078125,0.671875,5.65,32.16,78.0,1,34,182.0,3,6,0,0,49.611369,S1
4,0.8510,-0.0674,-0.3694,-1.475525,-0.089905,-0.765625,-0.078125,0.671875,34.34,32.16,78.0,1,34,182.0,3,6,0,0,49.611369,S1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31751,0.8878,0.0122,-0.2330,2.789307,-0.281845,-0.468750,0.859375,0.093750,1.68,33.99,79.0,1,28,183.0,2,5,0,0,79.664230,S15
31752,0.8962,-0.0184,-0.2280,0.563049,0.075851,-0.609375,0.890625,0.265625,-16.38,33.99,79.0,1,28,183.0,2,5,1,0,79.664230,S15
31753,0.9010,0.0006,-0.2102,-1.075745,0.017166,-0.468750,0.531250,-0.562500,13.43,33.99,79.0,1,28,183.0,2,5,0,0,79.664230,S15
31754,0.8906,-0.0124,-0.2238,-2.117920,-0.304184,-0.375000,0.406250,-0.843750,-5.89,33.99,79.0,1,28,183.0,2,5,0,0,79.664230,S15


In [79]:
data.drop(columns = ['WEIGHT','Gender', 'AGE', 'HEIGHT', 'SKIN', 'SPORT', 'Activity','Subject' ], inplace = True)

In [82]:
data.corr()

Unnamed: 0,chest_ACC_x,chest_ACC_y,chest_ACC_z,chest_Resp,chest_ECG,wrist_ACC_x,wrist_ACC_y,wrist_ACC_z,wrist_BVP,wrist_TEMP,Rpeaks,Label
chest_ACC_x,1.0,-0.022678,0.246749,-0.007986,0.026009,-0.023795,0.058624,-0.113815,0.000852,-0.002779,0.015922,0.076182
chest_ACC_y,-0.022678,1.0,-0.065819,-0.00233,0.006766,0.013943,0.029692,0.034214,0.000401,0.092045,-0.00323,0.000186
chest_ACC_z,0.246749,-0.065819,1.0,-0.009516,0.004149,0.006047,0.225833,-0.137573,0.000853,-0.117131,0.055475,0.29252
chest_Resp,-0.007986,-0.00233,-0.009516,1.0,0.005615,0.003747,0.002294,-2.3e-05,0.001226,0.000104,0.004191,-0.00059
chest_ECG,0.026009,0.006766,0.004149,0.005615,1.0,0.001337,-0.000117,-0.002163,0.003871,0.000726,0.051265,0.002242
wrist_ACC_x,-0.023795,0.013943,0.006047,0.003747,0.001337,1.0,0.051566,0.276019,0.012247,0.00981,0.026711,0.12819
wrist_ACC_y,0.058624,0.029692,0.225833,0.002294,-0.000117,0.051566,1.0,-0.094746,-0.00394,-0.113022,0.049114,0.2514
wrist_ACC_z,-0.113815,0.034214,-0.137573,-2.3e-05,-0.002163,0.276019,-0.094746,1.0,0.003398,0.036707,-0.026166,-0.133656
wrist_BVP,0.000852,0.000401,0.000853,0.001226,0.003871,0.012247,-0.00394,0.003398,1.0,2.7e-05,0.007428,-3.3e-05
wrist_TEMP,-0.002779,0.092045,-0.117131,0.000104,0.000726,0.00981,-0.113022,0.036707,2.7e-05,1.0,0.01261,0.058727


In [83]:
y = data['Label']
X = data.drop(columns = ['Label'],inplace = False)

In [84]:
y.astype(np.float32)
X.astype(np.float32)

Unnamed: 0,chest_ACC_x,chest_ACC_y,chest_ACC_z,chest_Resp,chest_ECG,wrist_ACC_x,wrist_ACC_y,wrist_ACC_z,wrist_BVP,wrist_TEMP,Rpeaks
0,0.8564,-0.0678,-0.3656,4.441833,0.015610,-0.765625,-0.078125,0.671875,7.280000,32.130001,0.0
1,0.8514,-0.0654,-0.3688,4.876709,-0.015747,-0.765625,-0.078125,0.671875,-11.360000,32.160000,1.0
2,0.8514,-0.0662,-0.3708,3.340149,-0.008743,-0.765625,-0.078125,0.671875,-51.270000,32.160000,0.0
3,0.8594,-0.0632,-0.3640,0.740051,-0.339523,-0.750000,-0.078125,0.671875,5.650000,32.160000,0.0
4,0.8510,-0.0674,-0.3694,-1.475525,-0.089905,-0.765625,-0.078125,0.671875,34.340000,32.160000,0.0
...,...,...,...,...,...,...,...,...,...,...,...
31751,0.8878,0.0122,-0.2330,2.789307,-0.281845,-0.468750,0.859375,0.093750,1.680000,33.990002,0.0
31752,0.8962,-0.0184,-0.2280,0.563049,0.075851,-0.609375,0.890625,0.265625,-16.379999,33.990002,1.0
31753,0.9010,0.0006,-0.2102,-1.075745,0.017166,-0.468750,0.531250,-0.562500,13.430000,33.990002,0.0
31754,0.8906,-0.0124,-0.2238,-2.117920,-0.304184,-0.375000,0.406250,-0.843750,-5.890000,33.990002,0.0


In [91]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train,  y_val = train_test_split(X, y, test_size=0.4, random_state=42)
print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)

(310773, 11) (310773,) (207183, 11) (207183,)


In [92]:
## Change shape
X_train = np.expand_dims(X_train, -1)
X_val = np.expand_dims(X_val, -1)

print(X_train.shape, X_val.shape)

input_dim = X_train.shape[1:]

(310773, 11, 1) (207183, 11, 1)


In [93]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Convolution1D, Activation, Flatten, Dense, LeakyReLU, MaxPooling1D
from tensorflow.keras.optimizers import RMSprop, Adam

In [94]:
model = Sequential()
model.add(Convolution1D(filters=64, kernel_size=2, input_shape=input_dim))
model.add(MaxPooling1D(pool_size=2))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(1, activation="elu"))
model.compile(optimizer=Adam(learning_rate=0.01), loss="mean_absolute_error")
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_4 (Conv1D)           (None, 10, 64)            192       
                                                                 
 max_pooling1d_8 (MaxPooling  (None, 5, 64)            0         
 1D)                                                             
                                                                 
 max_pooling1d_9 (MaxPooling  (None, 2, 64)            0         
 1D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 1)                 129       
                                                                 
Total params: 321
Trainable params: 321
Non-trainable 

In [95]:
history = model.fit(X_train, y_train, epochs=100, batch_size=100, verbose=1, validation_data=(X_val, y_val))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [96]:
model.evaluate(X_val, y_val)



15.809282302856445