In [63]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv1D, LSTM, CuDNNLSTM, Flatten, Dropout
from keras import optimizers
from keras.utils import to_categorical

In [64]:
class trainerHelper():
    
    def __init__(self, data, labels, win_size, num_sensor_data, num_classes):
        self.data = data
        self.labels = labels
        self.win_size = win_size
        self.sensor_data = num_sensor_data
        self.num_classes = num_classes
        self.data_length = self.data.shape[0]
        print(self.data_length)
        self.start = 0
        
    def windows(self):
 
        while self.start + self.win_size < self.data_length:
            yield int(self.start), int(self.start + self.win_size)
            self.start += (self.win_size/2)
    
    def segment_data(self):
        self.start = 0
        segments = np.empty((0, self.win_size, self.sensor_data))
        labels= np.empty((0, self.num_classes))
        for (start, end) in self.windows():
            x = np.zeros((1, self.win_size, self.sensor_data))
            x[0,:] = self.data[start:end,:]
            label = self.labels[start:end,:]
            if(x.shape[1] == self.win_size):
                segments = np.vstack([segments,x])
                lb = np.zeros((1, self.num_classes))
                lb[0,:] = label[-1]
                labels = np.vstack([labels,lb])
        return segments, labels

In [65]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

def prepare_data(train_data, test_data):
    encoder = OneHotEncoder()
    train_labels = encoder.fit_transform(train_data['labels'].values.reshape(-1,1)).toarray()
    test_labels = encoder.transform(test_data['labels'].values.reshape(-1,1)).toarray()
    print(train_labels.shape)
    print(test_labels.shape)
    scaler = MinMaxScaler()
    train_data.drop(['labels'], axis=1, inplace=True)
    test_data.drop(['labels'], axis=1, inplace=True)
    train_data = scaler.fit_transform(train_data)
    test_data = scaler.transform(test_data)
    
    return train_data, test_data, train_labels, test_labels

In [66]:
data1= pd.read_csv("ADL1Opportunity_locomotion.csv",header=None)
data2= pd.read_csv("ADL2Opportunity_locomotion.csv",header=None)
data3= pd.read_csv("ADL3Opportunity_locomotion.csv",header=None)
data4= pd.read_csv("ADL4Opportunity_locomotion.csv",header=None)
data5= pd.read_csv("ADL5Opportunity_locomotion.csv",header=None)

In [67]:
prova = data1[0:4]
print(prova.shape)

(4, 114)


In [68]:
train_frames = [data1, data2, data3]
test_frames = [data4, data5]
train_data = pd.concat(train_frames)
test_data = pd.concat(test_frames)
train_data.drop([35,36,37], axis=1, inplace=True) # drop columns 35,36,37 they're all zeros
test_data.drop([35,36,37], axis=1, inplace=True)
train_data.columns = [i for i in range(train_data.shape[1])]
test_data.columns = [i for i in range(test_data.shape[1])]
train_data.rename(columns ={110: 'labels'}, inplace =True)
test_data.rename(columns ={110: 'labels'}, inplace =True)

In [69]:
print(test_data.shape)
train_data.head()

(65910, 111)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,101,102,103,104,105,106,107,108,109,labels
0,148.0,956.0,-358.0,19.0,986.0,196.0,95.0,975.0,152.0,194.0,...,319.0,-845.0,-20.0,57.0,42,57.0,20.0,42,175.0,1
1,89.0,973.0,-287.0,10.0,1004.0,162.0,125.0,968.0,122.0,224.0,...,325.0,-847.0,-17.0,38.0,31,38.0,17.0,31,175.0,1
2,130.0,988.0,-418.0,-11.0,1014.0,202.0,127.0,1002.0,113.0,279.0,...,328.0,-852.0,27.0,31.0,15,31.0,-27.0,15,175.0,1
3,89.0,980.0,-425.0,-47.0,1025.0,191.0,110.0,1006.0,105.0,353.0,...,321.0,-852.0,26.0,22.0,-2,22.0,-26.0,-2,175.0,1
4,64.0,857.0,-391.0,-8.0,1022.0,204.0,97.0,1002.0,93.0,548.0,...,321.0,-850.0,22.0,45.0,-7,45.0,-22.0,-7,175.0,1


In [70]:
scaled_train, scaled_test, train_labels, test_labels = prepare_data(train_data, test_data)

(116585, 5)
(65910, 5)


In [71]:
print(scaled_train.shape)
print(train_labels.shape)

(116585, 110)
(116585, 5)


In [75]:
print(data3.shape[0])

33274


In [83]:
train_adls = [data1, data2, data3]
test_adls = [data4, data5]
win_size = 15
classes = 5
num_sensors = scaled_train.shape[1]

train_segments = np.empty((0, win_size, num_sensors))
train_segments_labels= np.empty((0, classes))
last_idx = 0
for adl in train_adls:

    trainer_helper = trainerHelper(scaled_train[last_idx:last_idx+adl.shape[0]], train_labels[last_idx:last_idx+adl.shape[0]], win_size, num_sensors, classes)
    segments, labels = trainer_helper.segment_data()
    train_segments = np.vstack([train_segments, segments])
    train_segments_labels = np.vstack([train_segments_labels, labels])
    last_idx = adl.shape[0]

print('Train data has been segmented')

test_segments = np.empty((0, win_size, num_sensors))
test_segments_labels= np.empty((0, classes))
last_idx = 0
for adl in test_adls:

    trainer_helper = trainerHelper(scaled_test[last_idx:last_idx+adl.shape[0]], test_labels[last_idx:last_idx+adl.shape[0]], win_size, num_sensors, classes)
    segments, labels = trainer_helper.segment_data()
    test_segments = np.vstack([test_segments, segments])
    test_segments_labels = np.vstack([test_segments_labels, labels])
    last_idx = adl.shape[0]
    
print('Test data has been segmented')    

51088
32223
33274
Train data has been segmented
32955
32955
Test data has been segmented


In [84]:
train_segments.shape

(15540, 15, 110)

In [85]:
train_segments_labels.shape

(15540, 5)

In [87]:
reshaped_train = train_segments.transpose(0,2,1)
reshaped_test = test_segments.transpose(0,2,1)

In [101]:
model = Sequential()
kernel_height = 5
inputshape = (num_sensors, win_size)
validationRatio=0.8;

In [102]:
model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu',
                 input_shape=inputshape))

model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

model.add(Dropout(0.5))

model.add(CuDNNLSTM(128, return_sequences=True))

model.add(Dropout(0.5))

model.add(CuDNNLSTM(128, return_sequences=False))

model.add(Dropout(0.5))

model.add(Dense(classes, activation='softmax'))

adam = optimizers.RMSprop(lr=0.001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [103]:
for layer in model.layers:
    print(layer.name)

conv1d_9
conv1d_10
conv1d_11
conv1d_12
dropout_7
cu_dnnlstm_3
dropout_8
cu_dnnlstm_4
dropout_9
dense_3


In [104]:
batchSize = 100

model.fit(reshaped_train,train_segments_labels, validation_split=1-validationRatio,epochs=50,batch_size=batchSize,verbose=1)

print('Calculating score.. ')
score = model.evaluate(reshaped_test,test_segments_labels,verbose=1)
print(score)
model.save('CIP_ADL_CNN4_LSTM2_model.h5')

Train on 12432 samples, validate on 3108 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Calculating score.. 
[0.87653007126253712, 0.79485428051001816]


In [109]:
predictions = model.predict(reshaped_test)

In [110]:
count = 0.0
den = 0.0
for pair in zip(predictions, test_segments_labels):
    if np.argmax(pair[0]) == np.argmax(pair[1]):
        count += 1.0
    den += 1.0

print(count / den)

0.7948542805100182


In [38]:
pred_df = pd.DataFrame(predictions)
pred_df.to_csv('preds_test.csv', header=False, index=False)

In [39]:
true_df = pd.DataFrame(testY)
true_df.to_csv('true_test.csv', header=False, index=False)