In [244]:
import tensorflow as tf
import numpy as np
from collections import Counter
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [197]:
def read_signals(filename):
    with open(filename, 'r') as fp:
        data = fp.read().splitlines()
        data = map(lambda x: x.rstrip().lstrip().split(), data)
        data = [list(map(float, line)) for line in data]
        data = np.array(data, dtype=np.float32)
    return data

In [198]:
def read_labels(filename):        
    with open(filename, 'r') as fp:
        activities = fp.read().splitlines()
        activities = list(map(int, activities))
    return np.array(activities)

In [199]:
def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation, :, :]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

In [200]:
def one_hot_encode(np_array, num_labels):
    return (np.arange(num_labels) == np_array[:,None]).astype(np.float32)

In [201]:
def reformat_data(dataset, labels):
    no_labels = len(np.unique(labels))
    labels = one_hot_encode(labels, no_labels)
    dataset, labels = randomize(dataset, labels)
    return dataset, labels

In [202]:
d_activity_num_to_labels = {
    1: 'walking',
    2: 'walking upstairs',
    3: 'walking downstairs',
    4: 'sitting',
    5: 'standing',
    6: 'laying'
}

In [203]:
def accuracy(y_predicted, y):
    return (100.0 * np.sum(np.argmax(y_predicted, 1) == np.argmax(y, 1)) / y_predicted.shape[0])

# 1. Loading the dataset

In [204]:
mainPath = 'C:\\Users\\grappioloc\\Dropbox\\dev\\meetup_20171031_RNN\\UCI_HAR\\'

In [205]:
INPUT_FOLDER_TRAIN = mainPath + 'train\\Inertial Signals\\'
INPUT_FOLDER_TEST = mainPath + 'test\\Inertial Signals\\'

In [206]:
INPUT_FILES_TRAIN = ['body_acc_x_train.txt', 'body_acc_y_train.txt', 'body_acc_z_train.txt', 
                     'body_gyro_x_train.txt', 'body_gyro_y_train.txt', 'body_gyro_z_train.txt',
                     'total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt']

In [207]:
INPUT_FILES_TEST = ['body_acc_x_test.txt', 'body_acc_y_test.txt', 'body_acc_z_test.txt', 
                     'body_gyro_x_test.txt', 'body_gyro_y_test.txt', 'body_gyro_z_test.txt',
                     'total_acc_x_test.txt', 'total_acc_y_test.txt', 'total_acc_z_test.txt']

In [208]:
LABELFILE_TRAIN = mainPath + 'train\\y_train.txt'
LABELFILE_TEST = mainPath + 'test\\y_test.txt'

In [209]:
train_signals, test_signals = [], []

In [210]:
for input_file in INPUT_FILES_TRAIN:
    signal = read_signals(INPUT_FOLDER_TRAIN + input_file)
    train_signals.append(signal)
train_signals = np.transpose(np.array(train_signals), (1, 2, 0))

In [211]:
for input_file in INPUT_FILES_TEST:
    signal = read_signals(INPUT_FOLDER_TEST + input_file)
    test_signals.append(signal)
test_signals = np.transpose(np.array(test_signals), (1, 2, 0))

In [212]:
train_labels = read_labels(LABELFILE_TRAIN)
test_labels = read_labels(LABELFILE_TEST)

In [213]:
[no_signals_train, no_steps_train, no_components_train] = np.shape(train_signals)
[no_signals_test, no_steps_test, no_components_test] = np.shape(test_signals)
no_labels = len(np.unique(train_labels[:]))

In [214]:
print("The train dataset contains {} signals, each one of length {} and {} components ".format(no_signals_train, no_steps_train, no_components_train))
print("The test dataset contains {} signals, each one of length {} and {} components ".format(no_signals_test, no_steps_test, no_components_test))
print("The train dataset contains {} labels, with the following distribution:\n {}".format(np.shape(train_labels)[0], Counter(train_labels[:])))
print("The test dataset contains {} labels, with the following distribution:\n {}".format(np.shape(test_labels)[0], Counter(test_labels[:])))

The train dataset contains 7352 signals, each one of length 128 and 9 components 
The test dataset contains 2947 signals, each one of length 128 and 9 components 
The train dataset contains 7352 labels, with the following distribution:
 Counter({6: 1407, 5: 1374, 4: 1286, 1: 1226, 2: 1073, 3: 986})
The test dataset contains 2947 labels, with the following distribution:
 Counter({6: 537, 5: 532, 1: 496, 4: 491, 2: 471, 3: 420})


In [215]:
train_dataset, train_labels = reformat_data(train_signals, train_labels)
test_dataset, test_labels = reformat_data(test_signals, test_labels)

# 2. Defining some variables and models

In [216]:
num_steps = 128
num_components = 9
num_labels = 6
num_hidden = 32
learning_rate = 0.001

In [259]:
total_steps = 10000
display_step = 100
batch_size = 1000

## 2.1 Fully Connected NN

In [260]:
NUM_HIDDEN = 32
NUM_COMPONENTS = 9
NUM_STEPS = 128

In [261]:
def fccd_variables(num_labels, num_components = NUM_COMPONENTS, num_steps = NUM_STEPS, num_hidden = NUM_HIDDEN):
    w1 = tf.Variable(tf.truncated_normal([num_components*num_steps, num_hidden], stddev=0.1))
    b1 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))
   
    w2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
    b2 = tf.Variable(tf.constant(1.0, shape = [num_labels]))
    return {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2 }

In [262]:
def fccd_model(data, variables):
    shape = data.get_shape().as_list()
    flattened_data = tf.reshape(data, [-1, shape[1]*shape[2]])
    
    layer1 = tf.matmul(flattened_data, variables['w1']) + variables['b1']
    layer1_actv = tf.nn.relu(layer1)
    
    logit = tf.matmul(layer1_actv, variables['w2']) + variables['b2']
    return logit

## 2.2 Recurrent NN

In [263]:
NUM_HIDDEN = 32

In [268]:
def rnn_model(data, variables, num_out = 6, num_hidden = NUM_HIDDEN, num_layers=1):
    
    cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(num_hidden) for _ in range(num_layers)])
    
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    networkOut, state = tf.nn.dynamic_rnn(cell, inputs=data, initial_state=initial_state)
    networkOut = networkOut[:,-1,:]
    
    # final MLP layer
    w1 = tf.Variable(tf.truncated_normal([num_hidden, num_out], stddev=0.1))
    b1 = tf.Variable(tf.constant(1.0, shape = [num_out]))
    layer1 = tf.matmul(networkOut, w1) + b1
    layer1_actv = tf.nn.relu(layer1)
    
    return layer1_actv

# 3. Building the Graph with all computational steps

In [269]:
train_data = train_dataset
train_labels = train_labels
test_data = test_dataset
test_labels = test_labels

In [270]:
graph = tf.Graph()
with graph.as_default():
    #1) First we put the input data in a tensorflow friendly form.    
    tf_dataset = tf.placeholder(tf.float32, shape=(None, num_steps, num_components))
    tf_labels = tf.placeholder(tf.float32, shape = (None, num_labels))

    #2) Choose the 'variables' containing the weights and biases
    variables = rnn_variables(num_labels)
    #variables = fccd_variables(num_labels)

    #3.Choose the model you will use to calculate the logits (predicted labels)
    model = rnn_model
    #model = fccd_model
    logits = model(tf_dataset, variables, num_hidden=64, num_layers=2)

    #4. Then we compute the softmax cross entropy between the logits and the (actual) labels
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_labels))

    #5. The optimizer is used to calculate the gradients of the loss function 
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    #optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.0).minimize(loss)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

    # Predictions for the training, validation, and test data.
    prediction = tf.nn.softmax(logits)

# 4. Running the graph

In [271]:
train_acc = []
test_acc = []

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized with learning_rate', learning_rate)
    for step in range(total_steps):
        #Since we are using stochastic gradient descent, we are selecting  small batches from the training dataset,
        #and training the convolutional neural network each time with a batch. 
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_data[offset:(offset + batch_size), :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]

        feed_dict = {tf_dataset : batch_data, tf_labels : batch_labels}
        _, l, train_predictions = session.run([optimizer, loss, prediction], feed_dict=feed_dict)
        train_accuracy = accuracy(train_predictions, batch_labels)
        train_acc.append(train_accuracy)

        if step % display_step == 0:
            indices = np.random.choice(test_labels.shape[0], batch_size)
            test_datab = test_data[indices, :, :]
            test_labelsb = test_labels[indices, :]
            feed_dict = {tf_dataset : test_datab, tf_labels : test_labelsb}
            _, test_predictions = session.run([loss, prediction], feed_dict=feed_dict)
            test_accuracy = accuracy(test_predictions, test_labelsb)
            test_acc.append(test_accuracy)
            message = "step {:04d} : loss is {:06.2f}, accuracy on training set {} %, accuracy on test set {:02.2f} %".format(step, l, train_accuracy, test_accuracy)
            print(message)

Initialized with learning_rate 0.001
step 0000 : loss is 001.44, accuracy on training set 14.8 %, accuracy on test set 16.10 %
step 0100 : loss is 000.53, accuracy on training set 78.5 %, accuracy on test set 71.80 %
step 0200 : loss is 000.23, accuracy on training set 92.7 %, accuracy on test set 83.80 %
step 0300 : loss is 000.15, accuracy on training set 94.2 %, accuracy on test set 86.30 %
step 0400 : loss is 000.14, accuracy on training set 93.6 %, accuracy on test set 85.60 %
step 0500 : loss is 000.13, accuracy on training set 95.3 %, accuracy on test set 89.80 %
step 0600 : loss is 000.12, accuracy on training set 93.9 %, accuracy on test set 89.30 %
step 0700 : loss is 000.11, accuracy on training set 94.5 %, accuracy on test set 89.00 %
step 0800 : loss is 000.10, accuracy on training set 95.6 %, accuracy on test set 89.70 %
step 0900 : loss is 000.11, accuracy on training set 95.8 %, accuracy on test set 90.70 %
step 1000 : loss is 000.09, accuracy on training set 96.2 %, ac

KeyboardInterrupt: 

In [None]:
fig = plt.figure()
plt.plot(train_acc)
plt.plot(test_acc)
plt.xlabel("step")
plt.ylabel("accuracy")
plt.show()