In [1]:
import pandas as pd
import numpy as np
import pickle as cp
from sliding_window import sliding_window

from keras.models import Sequential
from keras.layers import Dense, Conv1D, Conv2D, LSTM, CuDNNLSTM, Flatten, Dropout, Input, TimeDistributed, Reshape
from keras import optimizers
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
num_sensors = 113
win_size = 24
classes = 18

In [3]:
def  load_dataset(filename):

    f = open(filename, 'rb')
    data = cp.load(f)
    f.close()

    X_train, y_train = data[0]
    X_test, y_test = data[1]

    print(" ..from file {}".format(filename))
    print(" ..reading instances: train {0}, test {1}".format(X_train.shape, X_test.shape))

    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)

    # The targets are casted to int8 for GPU compatibility.
    y_train = y_train.astype(np.uint8)
    y_test = y_test.astype(np.uint8)

    return X_train, y_train, X_test, y_test

print("Loading data...")
X_train, y_train, X_test, y_test = load_dataset('oppChallenge_gestures.data')

Loading data...
 ..from file oppChallenge_gestures.data
 ..reading instances: train (557963, 113), test (118750, 113)


In [4]:
X_train

array([[ 0.50666666,  0.67183334,  0.50816667, ...,  0.49810001,
         0.50105   ,  0.83600003],
       [ 0.50816667,  0.67383331,  0.50700003, ...,  0.49654999,
         0.50059998,  0.83600003],
       [ 0.50283331,  0.67483336,  0.514     , ...,  0.4975    ,
         0.50050002,  0.83600003],
       ..., 
       [ 0.5       ,  0.5       ,  0.5       , ...,  0.49880001,
         0.50064999,  0.53200001],
       [ 0.5       ,  0.5       ,  0.5       , ...,  0.49994999,
         0.50160003,  0.53600001],
       [ 0.5       ,  0.5       ,  0.5       , ...,  0.5       ,
         0.5       ,  0.5       ]], dtype=float32)

In [4]:
"""assert num_sensors == X_train.shape[1]
def opp_sliding_window(data_x, data_y, ws, ss):
    data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))
    data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])
    return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)

# Sensor data is segmented using a sliding window mechanism
X_test, y_test = opp_sliding_window(X_test, y_test, win_size, win_size/2)
print(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))

# Data is reshaped since the input of the network is a 4 dimension tensor
#X_test = X_test.reshape((-1, 1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))"""

'assert num_sensors == X_train.shape[1]\ndef opp_sliding_window(data_x, data_y, ws, ss):\n    data_x = sliding_window(data_x,(ws,data_x.shape[1]),(ss,1))\n    data_y = np.asarray([[i[-1]] for i in sliding_window(data_y,ws,ss)])\n    return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)\n\n# Sensor data is segmented using a sliding window mechanism\nX_test, y_test = opp_sliding_window(X_test, y_test, win_size, win_size/2)\nprint(" ..after sliding window (testing): inputs {0}, targets {1}".format(X_test.shape, y_test.shape))\n\n# Data is reshaped since the input of the network is a 4 dimension tensor\n#X_test = X_test.reshape((-1, 1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS))'

In [8]:
class trainerHelper():
    
    def __init__(self, data, labels, win_size, num_sensor_data, num_classes):
        self.data = data
        self.labels = labels
        self.win_size = win_size
        self.sensor_data = num_sensor_data
        self.num_classes = num_classes
        self.data_length = self.data.shape[0]
        print(self.data_length)
        self.start = 0
        
    def windows(self):
 
        while self.start + self.win_size < self.data_length:
            yield int(self.start), int(self.start + self.win_size)
            self.start += (self.win_size/2)
    
    def segment_data(self):
        self.start = 0
        segments = np.empty((0, self.win_size, self.sensor_data))
        labels= np.empty((0, self.num_classes))
        for (start, end) in self.windows():
            x = np.zeros((1, self.win_size, self.sensor_data))
            x[0,:] = self.data[start:end,:]
            label = self.labels[start:end]
            if(x.shape[1] == self.win_size):
                segments = np.vstack([segments,x])
                lb = np.zeros((1, self.num_classes))
                lb[0,:] = label[-1]
                labels = np.vstack([labels,lb])
        return segments, labels

In [9]:
y_train.shape

(557963,)

In [None]:
trainer_helper = trainerHelper(X_train, y_train, win_size, num_sensors, classes)
segments, labels = trainer_helper.segment_data()

557963


In [None]:
model = Sequential()
size_of_kernel = (5,1)
kernel_strides = 1
num_filters = 64
num_lstm_cells = 128
dropout_prob = 0.5
inputshape = (win_size, num_sensors, 1)
batchshape = (None, win_size, num_sensors)

In [None]:
model.add(Conv2D(num_filters, kernel_size=size_of_kernel, strides=kernel_strides,
                 activation='relu', input_shape=inputshape, name='1_conv_layer'))

In [None]:
model.add(Conv2D(num_filters, kernel_size=size_of_kernel, strides=kernel_strides,
                 activation='relu', name='2_conv_layer'))

In [None]:
model.add(Conv2D(num_filters, kernel_size=size_of_kernel, strides=kernel_strides,
                 activation='relu', name='3_conv_layer'))

In [None]:
model.add(Conv2D(num_filters, kernel_size=size_of_kernel, strides=kernel_strides,
                 activation='relu', name='4_conv_layer'))

In [None]:
model.add(Reshape((8, num_filters*num_sensors)))

In [None]:
model.add(CuDNNLSTM(num_lstm_cells, return_sequences=True, name='1_lstm_layer'))

model.add(Dropout(dropout_prob, name='2_dropout_layer'))

model.add(CuDNNLSTM(num_lstm_cells, return_sequences=False, name='2_lstm_layer'))

model.add(Dropout(dropout_prob, name='3_dropout_layer'))

model.add(Dense(classes, activation='softmax', name='softmax_layer'))

rms = optimizers.RMSprop(lr=0.001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['accuracy'])

In [None]:
for layer in model.layers:
    print(str(layer.name) + ': input shape: ' + str(layer.input_shape) + ' output shape: ' + str(layer.output_shape))

In [None]:
model.summary()

In [None]:
batchSize = 100
train_epoches = 50
model.fit(reshaped_train,train_segments_labels,validation_data=(reshaped_val,val_segments_labels),epochs=train_epoches,batch_size=batchSize,verbose=1)

print('Calculating score.. ')
score = model.evaluate(reshaped_test,test_segments_labels,verbose=1)
print(score)
model.save('taskB2_all_Subjects_CNN_LSTM_model.h5')

In [None]:
predictions = model.predict(reshaped_test)

In [None]:
count = 0.0
den = 0.0
for pair in zip(predictions, test_segments_labels):
    if np.argmax(pair[0]) == np.argmax(pair[1]):
        count += 1.0
    den += 1.0

print(count / den)

In [None]:
# F1-score measure
from sklearn.metrics import f1_score
num_classes = 18
class_predictions = []
class_true = []
tot_labels = 0.0
count = 0.0
for pair in zip(predictions, test_segments_labels):
    class_predictions.append(np.argmax(pair[0]))
    class_true.append(np.argmax(pair[1]))
    if np.argmax(pair[0]) == np.argmax(pair[1]):
        count += 1.0
    tot_labels += 1.0
    
print('Standard accuracy is ' + str(count/tot_labels))    

unique, counts = np.unique(class_true, return_counts=True)
counted_labels = dict(zip(unique, counts))
f1_scores = f1_score(class_predictions, class_true, average=None)

tot_f1_score = 0.0
weights_sum = 0.0
for i in range(num_classes):
    labels_class_i = counted_labels[i]
    weight_i = labels_class_i / tot_labels
    weights_sum += weight_i
    tot_f1_score += f1_scores[i]*weight_i
    print(str(i) + ' ' + str(weight_i) + ' ' + str(f1_scores[i]))

    
print('The weigths sum is ' + str(weights_sum))
print('The computed f1-score is {}'.format(tot_f1_score))
print('The f1-score with sklearn function is {}'.format(f1_score(class_true, class_predictions, average='weighted')))

In [None]:
pred_df = pd.DataFrame(predictions)
pred_df.to_csv('preds_test.csv', header=False, index=False)

In [None]:
true_df = pd.DataFrame(testY)
true_df.to_csv('true_test.csv', header=False, index=False)