In [1]:
import tensorflow as tf
import numpy as np
from collections import Counter
import os

In [2]:
def read_signals(filename):
    with open(filename, 'r') as fp:
        data = fp.read().splitlines()
        data = map(lambda x: x.rstrip().lstrip().split(), data)
        data = [list(map(float, line)) for line in data]
        data = np.array(data, dtype=np.float32)
    return data

def read_labels(filename):        
    with open(filename, 'r') as fp:
        activities = fp.read().splitlines()
        activities = list(map(int, activities))
    return np.array(activities)

def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation, :, :]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

def one_hot_encode(np_array, num_labels):
    return (np.arange(num_labels) == np_array[:,None]).astype(np.float32)

def reformat_data(dataset, labels):
    no_labels = len(np.unique(labels))
    labels = one_hot_encode(labels, no_labels)
    dataset, labels = randomize(dataset, labels)
    return dataset, labels

d_activity_num_to_labels = {
    1: 'walking',
    2: 'walking upstairs',
    3: 'walking downstairs',
    4: 'sitting',
    5: 'standing',
    6: 'laying'
}

def accuracy(y_predicted, y):
    return (100.0 * np.sum(np.argmax(y_predicted, 1) == np.argmax(y, 1)) / y_predicted.shape[0])

# 1. Loading the dataset

In [3]:
INPUT_FOLDER_TRAIN = './data/UCI_HAR/train/InertialSignals/'
INPUT_FOLDER_TEST = './data/UCI_HAR/test/InertialSignals/'

INPUT_FILES_TRAIN = ['body_acc_x_train.txt', 'body_acc_y_train.txt', 'body_acc_z_train.txt', 
                     'body_gyro_x_train.txt', 'body_gyro_y_train.txt', 'body_gyro_z_train.txt',
                     'total_acc_x_train.txt', 'total_acc_y_train.txt', 'total_acc_z_train.txt']

INPUT_FILES_TEST = ['body_acc_x_test.txt', 'body_acc_y_test.txt', 'body_acc_z_test.txt', 
                     'body_gyro_x_test.txt', 'body_gyro_y_test.txt', 'body_gyro_z_test.txt',
                     'total_acc_x_test.txt', 'total_acc_y_test.txt', 'total_acc_z_test.txt']

LABELFILE_TRAIN = './data/UCI_HAR/train/y_train.txt'
LABELFILE_TEST = './data/UCI_HAR/test/y_test.txt'

train_signals, test_signals = [], []

for input_file in INPUT_FILES_TRAIN:
    signal = read_signals(INPUT_FOLDER_TRAIN + input_file)
    train_signals.append(signal)
train_signals = np.transpose(np.array(train_signals), (1, 2, 0))

for input_file in INPUT_FILES_TEST:
    signal = read_signals(INPUT_FOLDER_TEST + input_file)
    test_signals.append(signal)
test_signals = np.transpose(np.array(test_signals), (1, 2, 0))

train_labels = read_labels(LABELFILE_TRAIN)
test_labels = read_labels(LABELFILE_TEST)

[no_signals_train, no_steps_train, no_components_train] = np.shape(train_signals)
[no_signals_test, no_steps_test, no_components_test] = np.shape(test_signals)
no_labels = len(np.unique(train_labels[:]))

print("The train dataset contains {} signals, each one of length {} and {} components ".format(no_signals_train, no_steps_train, no_components_train))
print("The test dataset contains {} signals, each one of length {} and {} components ".format(no_signals_test, no_steps_test, no_components_test))
print("The train dataset contains {} labels, with the following distribution:\n {}".format(np.shape(train_labels)[0], Counter(train_labels[:])))
print("The test dataset contains {} labels, with the following distribution:\n {}".format(np.shape(test_labels)[0], Counter(test_labels[:])))

train_dataset, train_labels = reformat_data(train_signals, train_labels)
test_dataset, test_labels = reformat_data(test_signals, test_labels)

The train dataset contains 7352 signals, each one of length 128 and 9 components 
The test dataset contains 2947 signals, each one of length 128 and 9 components 
The train dataset contains 7352 labels, with the following distribution:
 Counter({6: 1407, 5: 1374, 4: 1286, 1: 1226, 2: 1073, 3: 986})
The test dataset contains 2947 labels, with the following distribution:
 Counter({6: 537, 5: 532, 1: 496, 4: 491, 2: 471, 3: 420})


# 2. Defining some variables and models

In [4]:
num_steps = 128
num_components = 9
num_labels = 6
num_hidden = 32
learning_rate = 0.001

total_steps = 10000
display_step = 100
batch_size = 1000

## 2.1 Fully Connected NN

In [5]:
NUM_HIDDEN = 32
NUM_COMPONENTS = 9
NUM_STEPS = 128

def fccd_variables(num_labels, num_components = NUM_COMPONENTS, num_steps = NUM_STEPS, num_hidden = NUM_HIDDEN):
    w1 = tf.Variable(tf.truncated_normal([num_components*num_steps, num_hidden], stddev=0.1))
    b1 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))
   
    w2 = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
    b2 = tf.Variable(tf.constant(1.0, shape = [num_labels]))
    return {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2 }

def fccd_model(data, variables):
    shape = data.get_shape().as_list()
    flattened_data = tf.reshape(data, [-1, shape[1]*shape[2]])
    
    layer1 = tf.matmul(flattened_data, variables['w1']) + variables['b1']
    layer1_actv = tf.nn.relu(layer1)
    
    logit = tf.matmul(layer1_actv, variables['w2']) + variables['b2']
    return logit

## 2.2 Recurrent NN

In [6]:
NUM_HIDDEN = 32

def rnn_variables(num_labels, num_hidden = NUM_HIDDEN):
    w1 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]))
    b1 = tf.Variable(tf.random_normal([num_labels]))
    return {'w1': w1, 'b1': b1}

def rnn_model(data, variables, num_hidden = NUM_HIDDEN):
    splitted_data = tf.unstack(data, axis=1)
    
    #First line is for free, but the rest of the RNN you have to build yourself.
    
    return logits

# 3. Building the Graph with all computational steps

In [7]:
train_data = train_dataset
train_labels = train_labels
test_data = test_dataset
test_labels = test_labels


graph = tf.Graph()
with graph.as_default():
    #1) First we put the input data in a tensorflow friendly form.    
    tf_dataset = tf.placeholder(tf.float32, shape=(None, num_steps, num_components))
    tf_labels = tf.placeholder(tf.float32, shape = (None, num_labels))

    #2) Choose the 'variables' containing the weights and biases
    #variables = rnn_variables(num_labels)
    variables = fccd_variables(num_labels)

    #3.Choose the model you will use to calculate the logits (predicted labels)
    #model = rnn_model
    model = fccd_model
    logits = model(tf_dataset, variables)

    #4. Then we compute the softmax cross entropy between the logits and the (actual) labels
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_labels))

    #5. The optimizer is used to calculate the gradients of the loss function 
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    #optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.0).minimize(loss)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

    # Predictions for the training, validation, and test data.
    prediction = tf.nn.softmax(logits)



# 4. Running the graph

In [8]:
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized with learning_rate', learning_rate)
    for step in range(total_steps):
        #Since we are using stochastic gradient descent, we are selecting  small batches from the training dataset,
        #and training the convolutional neural network each time with a batch. 
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_data[offset:(offset + batch_size), :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]

        feed_dict = {tf_dataset : batch_data, tf_labels : batch_labels}
        _, l, train_predictions = session.run([optimizer, loss, prediction], feed_dict=feed_dict)
        train_accuracy = accuracy(train_predictions, batch_labels)

        if step % display_step == 0:
            feed_dict = {tf_dataset : test_data, tf_labels : test_labels}
            _, test_predictions = session.run([loss, prediction], feed_dict=feed_dict)
            test_accuracy = accuracy(test_predictions, test_labels)
            message = "step {:04d} : loss is {:06.2f}, accuracy on training set {} %, accuracy on test set {:02.2f} %".format(step, l, train_accuracy, test_accuracy)
            print(message)

Initialized with learning_rate 0.001
step 0000 : loss is 001.45, accuracy on training set 14.3 %, accuracy on test set 15.61 %
step 0100 : loss is 000.64, accuracy on training set 60.9 %, accuracy on test set 50.53 %
step 0200 : loss is 000.45, accuracy on training set 68.3 %, accuracy on test set 58.64 %
step 0300 : loss is 000.37, accuracy on training set 70.6 %, accuracy on test set 61.15 %
step 0400 : loss is 000.36, accuracy on training set 68.8 %, accuracy on test set 61.15 %
step 0500 : loss is 000.33, accuracy on training set 71.2 %, accuracy on test set 58.77 %
step 0600 : loss is 000.32, accuracy on training set 70.3 %, accuracy on test set 58.64 %
step 0700 : loss is 000.47, accuracy on training set 66.5 %, accuracy on test set 56.57 %
step 0800 : loss is 000.72, accuracy on training set 59.1 %, accuracy on test set 50.22 %
step 0900 : loss is 000.90, accuracy on training set 50.6 %, accuracy on test set 42.96 %
step 1000 : loss is 001.04, accuracy on training set 51.6 %, ac