In [5]:
"""loop through files in S1_Dataset directory"""
"""create 2D data matrix by appending each new dataset to the bottom of data matrixx"""
"""add a column indicating the paitent ID"""
import os
import pandas as pd
import numpy as np
S1_PATH = os.path.join('..','..','Datasets_Healthy_Older_People','S1_Dataset')
S2_PATH = os.path.join('..','..','Datasets_Healthy_Older_People','S2_Dataset')
print('Importing Data...')
s1_data = pd.DataFrame()
patient_id = 0
for filename in os.listdir(S1_PATH):
    if filename != 'README.txt':
        data_path = os.path.join(S1_PATH, filename)
        data = pd.read_csv(data_path, header=None)
        data[9] = np.zeros(data.shape[0])
        data[9] = patient_id
        patient_id = patient_id + 1
        s1_data = s1_data.append(data, ignore_index=True)
s1_data.columns = ['time','frontal','vertical','lateral','antenna_id','rssi','phase','frequency','activity','patient_id']
s1_data = s1_data.drop(columns=['phase','frequency'])
s1_data_by_patient = s1_data.set_index(['patient_id','time'])
print('Done')

Importing Data...
Done


In [8]:
s1_data_by_patient.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,frontal,vertical,lateral,antenna_id,rssi,activity
patient_id,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.0,0.27203,1.0082,-0.082102,1,-63.5,1
0,0.5,0.27203,1.0082,-0.082102,1,-63.0,1
0,1.5,0.44791,0.91636,-0.013684,1,-63.5,1
0,1.75,0.44791,0.91636,-0.013684,1,-63.0,1
0,2.5,0.34238,0.96229,-0.059296,1,-63.5,1


In [9]:
"""analyse data for sequential learning preprocessing"""

time_intervals = []
too_large_intervals = {}
too_large_interval_limit = 2
prev_time_stamp = 0
index = 0
for time_stamp in s1_data['time']:
    difference = time_stamp - prev_time_stamp
   
    if difference > too_large_interval_limit:
        too_large_intervals[index] = difference
    
    prev_time_stamp = time_stamp 
    if difference > 0:
        time_intervals.append(round(difference,4))
    index = index + 1

"""some large jumps of missing data found"""
print('Maximum time step: ', max(time_intervals)) 
print('Minimum time step: ', min(time_intervals))
print('Number of jumps larger than %ds: ' %too_large_interval_limit, len(too_large_intervals))

"""
for k,v in too_large_intervals.items():
    print('\nIndex: ', k, '\tJump: ', v)
    print(s1_data[k-5:k+5])
"""
    
"""find the smallest timestep"""
from math import gcd

smallest_timestep = time_intervals[0]
for time_step in time_intervals[1:]:
    smallest_timestep = gcd(round(smallest_timestep*1000), round(time_step*1000))/1000

print('Time step to fit everything: ', smallest_timestep)

Maximum time step:  185.15
Minimum time step:  0.02
Number of jumps larger than 2s:  1040
Time step to fit everything:  0.005


In [10]:
"""preprocess data for sequential learning"""
"""filter added to remove large time steps"""
"""filter added to remove """


"""
a data point is a set of vectors with features, which looks like:
<x_n,...,x_n+w>, <y_n,...,y_n+w>, <z_n,...,z_n+w>, <rssi_n,...,rssi_n+w>, <antennaID_n,...,antennaID_n+w>

a label is either the set which contains the features of the n+w+1 item or just the activity:
<x_n+w+1>, <y_n+w+1>, <z_n+w+1>, <rssi_n+w+1>, <antennaID_n+w+1>
or
<activity_n+w+1>
"""
largest_allowed_timestep = 10
window_size = 10
top_level_column_indexes = np.array(range(window_size))
second_level_column_indexes = np.array(['frontal','vertical','lateral','antenna_id','rssi'])
indexes = pd.MultiIndex.from_product([top_level_column_indexes,second_level_column_indexes])

number_of_patients = s1_data_by_patient.index.values[-1][0] + 1
# number_of_patients = 3

all_patients_sequence_data = []
all_patients_sequence_label = []
too_large_count = 0
for patient_index in range(number_of_patients):
    patient_data = s1_data_by_patient.xs(patient_index)
    time_indexes = patient_data.index.values
    
    #start_time = time_indexes[0]
    all_sequence_data = []
    all_sequence_label = []

    for window_start in range(len(time_indexes) - window_size - 1):
        sequence_data = []
        time_stamp = 0
        skip = 0
        for i in range(window_size):
            single_data = patient_data.iloc[window_start+i][:-1].values
            prev_time_stamp = time_stamp
            time_stamp = time_indexes[window_start+i]
            time_difference = time_stamp-prev_time_stamp
            if(time_difference > largest_allowed_timestep):
                skip = 1 
            sequence_data.extend(single_data)

        if(skip == 1):
            too_large_count = too_large_count + 1
            skip = 0
        else:
            all_sequence_data.append(sequence_data)
            all_sequence_label.append( patient_data.iloc[window_start+window_size+1][-1])
    
    all_patients_sequence_data.extend(all_sequence_data)
    all_patients_sequence_label.extend(all_sequence_label)

all_patients_sequence_data = np.asarray(all_patients_sequence_data)
all_patients_sequence_label = np.asarray(all_patients_sequence_label)

print(all_patients_sequence_data.shape)
print(all_patients_sequence_label.shape)
print(too_large_count)

all_patients_sequence_data_pd = pd.DataFrame(all_patients_sequence_data, columns=indexes)
all_patients_sequence_data_pd['label'] = all_patients_sequence_label
all_patients_sequence_data_pd


(1386, 50)
(1386,)
50446


Unnamed: 0_level_0,0,0,0,0,0,1,1,1,1,1,...,8,8,8,8,9,9,9,9,9,label
Unnamed: 0_level_1,frontal,vertical,lateral,antenna_id,rssi,frontal,vertical,lateral,antenna_id,rssi,...,vertical,lateral,antenna_id,rssi,frontal,vertical,lateral,antenna_id,rssi,Unnamed: 21_level_1
0,0.272030,1.00820,-0.082102,1.0,-63.5,0.272030,1.00820,-0.082102,1.0,-63.0,...,1.03120,-0.127710,1.0,-64.5,0.23685,1.03120,-0.127710,1.0,-66.0,1.0
1,0.272030,1.00820,-0.082102,1.0,-63.0,0.447910,0.91636,-0.013684,1.0,-63.5,...,1.03120,-0.127710,1.0,-66.0,0.23685,1.03120,-0.127710,1.0,-63.0,1.0
2,0.447910,0.91636,-0.013684,1.0,-63.5,0.447910,0.91636,-0.013684,1.0,-63.0,...,1.03120,-0.127710,1.0,-63.0,0.23685,1.03120,-0.127710,1.0,-65.0,1.0
3,0.447910,0.91636,-0.013684,1.0,-63.0,0.342380,0.96229,-0.059296,1.0,-63.5,...,1.03120,-0.127710,1.0,-65.0,0.31893,0.99674,-0.070699,1.0,-62.0,1.0
4,0.342380,0.96229,-0.059296,1.0,-63.5,0.342380,0.96229,-0.059296,4.0,-56.5,...,0.99674,-0.070699,1.0,-62.0,0.31893,0.99674,-0.070699,4.0,-56.5,1.0
5,0.342380,0.96229,-0.059296,4.0,-56.5,0.307210,0.99674,-0.070699,1.0,-63.5,...,0.99674,-0.070699,4.0,-56.5,0.47136,0.92784,-0.002281,1.0,-62.5,1.0
6,0.307210,0.99674,-0.070699,1.0,-63.5,0.272030,0.99674,-0.093505,1.0,-64.0,...,0.92784,-0.002281,1.0,-62.5,0.40101,0.93932,-0.025087,1.0,-64.0,1.0
7,0.272030,0.99674,-0.093505,1.0,-64.0,0.236850,1.03120,-0.127710,1.0,-64.5,...,0.93932,-0.025087,1.0,-64.0,0.40101,0.93932,-0.025087,1.0,-62.5,1.0
8,0.236850,1.03120,-0.127710,1.0,-64.5,0.236850,1.03120,-0.127710,1.0,-66.0,...,0.93932,-0.025087,1.0,-62.5,0.40101,0.93932,-0.025087,1.0,-64.5,1.0
9,0.236850,1.03120,-0.127710,1.0,-66.0,0.236850,1.03120,-0.127710,1.0,-63.0,...,0.93932,-0.025087,1.0,-64.5,0.40101,0.93932,-0.025087,4.0,-58.0,1.0


In [12]:
"""preprocess data for sequential learning"""
"""Keras requries 3D array for the LSTM Input"""
window_size = 10
top_level_column_indexes = np.array(range(window_size))
second_level_column_indexes = np.array(['frontal','vertical','lateral','antenna_id','rssi'])
indexes = pd.MultiIndex.from_product([top_level_column_indexes,second_level_column_indexes])

number_of_patients = s1_data_by_patient.index.values[-1][0] + 1
# number_of_patients = 3

too_large_count = 0
all_patients_sequence_data = []
all_patients_sequence_label = []
for patient_index in range(number_of_patients):
    patient_data = s1_data_by_patient.xs(patient_index)
    time_indexes = patient_data.index.values
    
    #start_time = time_indexes[0]
    all_sequence_data = []
    all_sequence_label = []

    for window_start in range(len(time_indexes) - window_size - 1):
        sequence_data = []
        time_stamp = 0
        skip = 0
        for i in range(window_size):
            single_data = patient_data.iloc[window_start+i][:-1].values
            prev_time_stamp = time_stamp
            time_stamp = time_indexes[window_start+i]
            time_difference = time_stamp-prev_time_stamp
            if(time_difference > largest_allowed_timestep):
                skip = 1 
            sequence_data.append(single_data)
        
        if(skip == 1):
            too_large_count = too_large_count + 1
        else:
            all_sequence_data.append(sequence_data)
            all_sequence_label.append( patient_data.iloc[window_start+window_size+1][-1])
    
    all_patients_sequence_data.extend(all_sequence_data)
    all_patients_sequence_label.extend(all_sequence_label)

all_patients_sequence_data = np.asarray(all_patients_sequence_data)
all_patients_sequence_label = np.asarray(all_patients_sequence_label)
print(too_large_count)


50446
[[[ 2.7203e-01  1.0082e+00 -8.2102e-02  1.0000e+00 -6.3500e+01]
  [ 2.7203e-01  1.0082e+00 -8.2102e-02  1.0000e+00 -6.3000e+01]
  [ 4.4791e-01  9.1636e-01 -1.3684e-02  1.0000e+00 -6.3500e+01]
  ...
  [ 2.7203e-01  9.9674e-01 -9.3505e-02  1.0000e+00 -6.4000e+01]
  [ 2.3685e-01  1.0312e+00 -1.2771e-01  1.0000e+00 -6.4500e+01]
  [ 2.3685e-01  1.0312e+00 -1.2771e-01  1.0000e+00 -6.6000e+01]]

 [[ 2.7203e-01  1.0082e+00 -8.2102e-02  1.0000e+00 -6.3000e+01]
  [ 4.4791e-01  9.1636e-01 -1.3684e-02  1.0000e+00 -6.3500e+01]
  [ 4.4791e-01  9.1636e-01 -1.3684e-02  1.0000e+00 -6.3000e+01]
  ...
  [ 2.3685e-01  1.0312e+00 -1.2771e-01  1.0000e+00 -6.4500e+01]
  [ 2.3685e-01  1.0312e+00 -1.2771e-01  1.0000e+00 -6.6000e+01]
  [ 2.3685e-01  1.0312e+00 -1.2771e-01  1.0000e+00 -6.3000e+01]]

 [[ 4.4791e-01  9.1636e-01 -1.3684e-02  1.0000e+00 -6.3500e+01]
  [ 4.4791e-01  9.1636e-01 -1.3684e-02  1.0000e+00 -6.3000e+01]
  [ 3.4238e-01  9.6229e-01 -5.9296e-02  1.0000e+00 -6.3500e+01]
  ...
  [ 2.3685e-

In [67]:
"""Randomize and Split the data into Train, Validation and Test"""
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

data, label = unison_shuffled_copies(all_patients_sequence_data,all_patients_sequence_label)


train_data = data[:1100]
val_data =  data[1100:1200]
test_data =  data[1200:]

train_label = label[:1100]
val_label =  label[1100:1200]
test_label =  label[1200:]

print(train_data.shape)
print(train_label.shape)



(1100, 10, 5)
(1100,)


In [63]:
"""setup and train the decision tree model"""
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.utils import to_categorical
from keras.optimizers import Adam

model = Sequential()
model.add(LSTM(500,return_sequences=True, input_shape=(10, 5)))
model.add(LSTM(500, return_sequences=True))
model.add(LSTM(500))
model.add(Dense(5, activation='softmax'))

model.summary()

adam = Adam(lr=0.001)

model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

train_label_binary = to_categorical(train_label)
val_label_binary = to_categorical(val_label)

model.fit(train_data, train_label_binary, 
          epochs=40, batch_size=10,
          validation_data=(val_data,val_label_binary))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_27 (LSTM)               (None, 10, 500)           1012000   
_________________________________________________________________
lstm_28 (LSTM)               (None, 10, 500)           2002000   
_________________________________________________________________
lstm_29 (LSTM)               (None, 500)               2002000   
_________________________________________________________________
dense_17 (Dense)             (None, 5)                 2505      
Total params: 5,018,505
Trainable params: 5,018,505
Non-trainable params: 0
_________________________________________________________________
Train on 1100 samples, validate on 100 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/4

<keras.callbacks.History at 0x20d35bbf908>

In [68]:
"""Evaluate Model"""
from sklearn.metrics import confusion_matrix

train_pred = np.argmax(model.predict(train_data),axis=1)
conf_matrix = confusion_matrix(train_label, train_pred)
print('Train Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

test_pred = np.argmax(model.predict(test_data),axis=1)
conf_matrix = confusion_matrix(test_label, test_pred)
print('\nTest Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

Train Set:
[[562   0   0   0]
 [  0 161   0   0]
 [  0   0 317   0]
 [  0   7   0  53]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 0.9583333333333334
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 0.8833333333333333

Test Set:
[[99  0  0  0]
 [ 0 35  0  0]
 [ 0  0 43  0]
 [ 0  0  0  9]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 1.0
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 1.0


In [68]:
"""Evaluate Model"""
from sklearn.metrics import confusion_matrix

train_pred = np.argmax(model.predict(train_data),axis=1)
conf_matrix = confusion_matrix(train_label, train_pred)
print('Train Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

test_pred = np.argmax(model.predict(test_data),axis=1)
conf_matrix = confusion_matrix(test_label, test_pred)
print('\nTest Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

Train Set:
[[562   0   0   0]
 [  0 161   0   0]
 [  0   0 317   0]
 [  0   7   0  53]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 0.9583333333333334
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 0.8833333333333333

Test Set:
[[99  0  0  0]
 [ 0 35  0  0]
 [ 0  0 43  0]
 [ 0  0  0  9]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 1.0
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 1.0


In [70]:
"""Save Final Model"""
from keras.models import load_model
model.save('lstm_model.h5')

In [72]:
"""Load Previously Trained Model"""

model2 = load_model('lstm_model.h5')

train_pred = np.argmax(model2.predict(train_data),axis=1)
conf_matrix = confusion_matrix(train_label, train_pred)
print('Train Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

test_pred = np.argmax(model.predict(test_data),axis=1)
conf_matrix = confusion_matrix(test_label, test_pred)
print('\nTest Set:')
print(conf_matrix)
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = conf_matrix[index,index]/sum(conf_matrix[:,index])
    recall = conf_matrix[index,index]/sum(conf_matrix[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

Train Set:
[[562   0   0   0]
 [  0 161   0   0]
 [  0   0 317   0]
 [  0   7   0  53]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 0.9583333333333334
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 0.8833333333333333

Test Set:
[[99  0  0  0]
 [ 0 35  0  0]
 [ 0  0 43  0]
 [ 0  0  0  9]]
sit on bed precision: 1.0
sit on bed recall: 1.0
sit on chair precision: 1.0
sit on chair recall: 1.0
lying precision: 1.0
lying recall: 1.0
ambulating precision: 1.0
ambulating recall: 1.0


In [3]:
import activity_analysis as aa
#two new features added, euler angles Pitch and roll
#new columns ['time','frontal','vertical','lateral','id','rssi','phase','frequency','roll','pitch','activity']
#            ['0',    '1',      '2',        '3' ,    '4',  '5',   '6',     '7',       '8',    '9',    '10']
tool = aa.Tool()
import pandas as pd
filtered_patients,filtered_activity = tool.filter_unbalances(90)#enter percentege
tool.patient_activity = filtered_activity
tool.patient_list = filtered_patients#filtering based on activity
tool.plot()

Number of Remaining Patients:  49


<Figure size 640x480 with 1 Axes>

Done


In [25]:
#interpolate_timeseries(window, steps, kind='linear', filtering = False, filter_features = [],ts_features = [1,2,3,4,5,6,7,8,9]))
#window given in seconds e.g. 5s
#steps also given in seconds e.g. 0.1
#kind (optional): type of interpolation e.g. ‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’,
#‘previous’, ‘next’, where ‘zero’, ‘slinear’, ‘quadratic’ and ‘cubic’ for more info see scipy.interp1d
#filtering & filter_features (optional): applies an lpf on the chosen features given by their indexes e.g. when filtering = True
#filter_features[1,2,3] will apply an lpf on the 3 axis of accelerometer
#ts_features (optional): will only transform these features to a time series e.g. [1,2,3,4,5,6,7,8,9] all features are transformed
time_series_patients, patients_labels= tool.interpolate_timeseries(10,0.1,ts_features=[1,2,3,5])
# time_series_patients = pd.DataFrame(time_series_patients[0])
time_series_patients[0]


Unnamed: 0_level_0,0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.1,0.2,0.2,...,9.7,9.7,9.8,9.8,9.8,9.8,9.9,9.9,9.9,9.9
Unnamed: 0_level_1,frontal,vertical,lateral,rssi,frontal,vertical,lateral,rssi,frontal,vertical,...,lateral,rssi,frontal,vertical,lateral,rssi,frontal,vertical,lateral,rssi
0,0.272030,1.008200,-0.082102,-63.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.272030,1.008200,-0.082102,-63.400000,1.008200,-0.082102,-63.500000,0.000000,0.000000,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.272030,1.008200,-0.082102,-63.300000,1.008200,-0.082102,-63.400000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.272030,1.008200,-0.082102,-63.200000,1.008200,-0.082102,-63.300000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.272030,1.008200,-0.082102,-63.100000,1.008200,-0.082102,-63.200000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,0.272030,1.008200,-0.082102,-63.000000,1.008200,-0.082102,-63.100000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,0.289618,0.999016,-0.075260,-63.050000,1.008200,-0.082102,-63.000000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,0.307206,0.989832,-0.068418,-63.100000,0.999016,-0.075260,-63.050000,-0.082102,1.008200,-0.082102,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,0.324794,0.980648,-0.061577,-63.150000,0.989832,-0.068418,-63.100000,-0.075260,0.999016,-0.075260,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,0.342382,0.971464,-0.054735,-63.200000,0.980648,-0.061577,-63.150000,-0.068418,0.989832,-0.068418,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [89]:
    """Randomize and Split the data into Train, Validation and Test"""
import numpy as np

#turn 1D
patients_labels = [pd.DataFrame(l) for l in patients_labels]
features = pd.concat(list(time_series_patients))
features = pd.DataFrame(features, columns=time_series_patients[0].columns)

#fuk pandas
features_array = []
group = []
t_prev = 0
for t,f in features:
        if t_prev != t:
            features_array.append(group)
            group = []
        group.append(list(features[t,f]))
        t_prev= t

labels = pd.concat(list(patients_labels))
print(len(features))
print(len(labels))


all_patients_sequence_data_2= np.array(features_array)
all_patients_sequence_data_2 = np.transpose(all_patients_sequence_data_2, (2, 1, 0))
all_patients_sequence_label_2= np.asarray(labels)

print("Features Shape:", all_patients_sequence_data_2.shape)
print("Label Shape:", all_patients_sequence_label_2.shape)

def unison_shuffled_copies(a, b):
        assert len(a) == len(b)
        p = np.random.permutation(len(a))
        return a[p], b[p]

data, label = unison_shuffled_copies(all_patients_sequence_data_2,all_patients_sequence_label_2)

percentage = [0.7,0.1,0.2]
indexes = []
sum = 0
for a in percentage:
        sum = sum+a*len(data)
        indexes.append(round(sum))
        
train_data = data[:indexes[0]]
val_data =  data[indexes[0]:indexes[1]]
test_data =  data[indexes[1]:]

train_label = label[:indexes[0]]
val_label =  label[indexes[0]:indexes[1]]
test_label =  label[indexes[1]:]

print(train_data.shape)
print(train_label.shape)


177046
177046
Features Shape: (177046, 4, 99)
Label Shape: (177046, 1)
(123932, 4, 99)
(123932, 1)


In [None]:
"""setup and train the decision tree model"""
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.utils import to_categorical
from keras.optimizers import Adam

model = Sequential()
model.add(LSTM(500,return_sequences=True, input_shape=(4,99)))
model.add(LSTM(500, return_sequences=True))
model.add(LSTM(500))
model.add(Dense(5, activation='softmax'))

model.summary()

adam = Adam(lr=0.001)

model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

train_label_binary = to_categorical(train_label)
val_label_binary = to_categorical(val_label)

model.fit(train_data, train_label_binary, 
          epochs=40, batch_size=10,
          validation_data=(val_data,val_label_binary))

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 4, 500)            1200000   
_________________________________________________________________
lstm_3 (LSTM)                (None, 4, 500)            2002000   
_________________________________________________________________
lstm_4 (LSTM)                (None, 500)               2002000   
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 2505      
Total params: 5,206,505
Trainable params: 5,206,505
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.cast instead.
Train on 123932 samples, validate on 17705 samples
Epoch 1/40