In [10]:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from scipy.ndimage import zoom

import os

import tensorflow as tf

In [11]:
# Extract http://archive.ics.uci.edu/ml/machine-learning-databases/00447/ to same directory as this script



# List files

one = ['TS1.txt', 'TS2.txt', 'TS3.txt', 'TS4.txt', 'VS1.txt', 'CE.txt', 'CP.txt', 'SE.txt']

ten = ['FS1.txt', 'FS2.txt']

hundred = ['PS1.txt', 'PS2.txt', 'PS3.txt', 'PS4.txt', 'PS5.txt', 'PS6.txt', 'EPS1.txt']



In [12]:
# Parse condition profiles

df_profile = pd.read_table('profile.txt', header=None)

df_profile = df_profile.values.reshape(2205, 1, 5)

df_profile = zoom(df_profile, (1,6000,1))



# Parse 1 Hz measurements

df_one =  np.stack([pd.read_table(x, header=None) for x in one], axis=2)

df_one = zoom(df_one, (1, 100, 1))



# Parse 10 Hz measurements

df_ten =  np.stack([pd.read_table(x, header=None) for x in ten], axis=2)

df_ten = zoom(df_ten, (1, 10, 1))



# Parse 100 Hz measurements

df_hundred = np.stack([pd.read_table(x, header=None) for x in hundred], axis=2)

In [13]:
# Concatenate all data

df = np.concatenate([df_profile, df_one, df_ten, df_hundred], axis=2)



In [14]:
# Split data into training, validation, and test sets

val = 0.2

test = 0.1

train = 1 - val - test



X_train = df[:int(train*df.shape[0])+1:,::,[i not in [1] for i in range(df.shape[2])]]

X_val = df[int(train*df.shape[0])+1:int(train*df.shape[0])+int(val*df.shape[0])+1:,::,[i not in [1] for i in range(df.shape[2])]]

X_test = df[int(train*df.shape[0])+int(val*df.shape[0])+1::,::,[i not in [1] for i in range(df.shape[2])]]



oh_target = (np.arange(df[:,0,1].max()+1) == df[:,0,1][...,None]).astype(int)

oh_target = np.delete(oh_target,np.where(~oh_target.any(axis=0))[0], axis=1)



y_train = oh_target[:int(train*oh_target.shape[0])+1:,]

y_val = oh_target[int(train*oh_target.shape[0])+1:int(train*oh_target.shape[0])+int(val*oh_target.shape[0])+1:,]

y_test = oh_target[int(train*oh_target.shape[0])+int(val*oh_target.shape[0])+1::,]



In [15]:
def sample_batch(X, y, batch_size):

    for b in range(0, len(X)-(len(X)%batch_size)-batch_size, batch_size):

        yield X[b:b + batch_size], y[b:b + batch_size]



# Constants

samples, seq_len, features = X_train.shape

n_classes = y_train.shape[1]



# Hyperparameters

lstm_size = 3*features

lstm_layers = 2

dropout = 0.8

batch_size = 50

learning_rate = 0.0001  # default is 0.001

epochs = 1



graph = tf.Graph()



with graph.as_default():

    inputs = tf.placeholder(dtype=tf.float32, shape=[None, seq_len, features], name='inputs')

    with tf.name_scope("Target"):

        target = tf.placeholder(dtype=tf.int32, shape=[None, n_classes], name='target')

    keep_prob = tf.placeholder(tf.float32, name = 'keep')



with graph.as_default():

    lstm_in = tf.transpose(inputs, [1, 0, 2])  # reshape into (seq_len, samples, features)

    lstm_in = tf.reshape(lstm_in, [-1, features])  # Now (seq_len*samples, features)



    # To cells

    lstm_in = tf.layers.dense(lstm_in, lstm_size, activation=None)



    # Open up the tensor into a list of seq_len pieces

    lstm_in = tf.split(lstm_in, seq_len, 0)



    # Add LSTM layers

    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

    cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)

    initial_state = cell.zero_state(batch_size, tf.float32)



with graph.as_default():

    outputs, final_state = tf.contrib.rnn.static_rnn(cell, lstm_in, dtype=tf.float32, initial_state=initial_state)



    # We only need the last output tensor to pass into a classifier

    logits = tf.layers.dense(outputs[-1], n_classes, name='logits')



    # Cost function and optimizer

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target))



    # No grad clipping

    # optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)



    # Grad clipping

    train_op = tf.train.AdamOptimizer(learning_rate)



    gradients = train_op.compute_gradients(cost)

    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]

    optimizer = train_op.apply_gradients(capped_gradients)



    # Accuracy

    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(target, 1))

    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')



if (os.path.exists('checkpoints') == False):

    os.system('mkdir checkpoints')



train_acc = []

train_loss = []



validation_acc = []

validation_loss = []



with graph.as_default():

    saver = tf.train.Saver()



with tf.Session(graph=graph) as sess:

    sess.run(tf.global_variables_initializer())

    iteration = 1



    for e in range(epochs):

        # Initialize

        state = sess.run(initial_state)



        # Loop over batches

        for x, y in sample_batch(X_train, y_train, batch_size):



            # Feed dictionary

            feed = {inputs: x, target: y, keep_prob: dropout, initial_state: state}



            loss, _, state, acc = sess.run([cost, optimizer, final_state, accuracy],

                                           feed_dict=feed)

            train_acc.append(acc)

            train_loss.append(loss)



            # Print at each 5 iters

            if (iteration % 5 == 0):

                print("Epoch: {}/{}".format(e+1, epochs),

                      "Iteration: {:d}".format(iteration),

                      "Train loss: {:6f}".format(loss),

                      "Train acc: {:.6f}".format(acc))



            # Compute validation loss at every 25 iterations

            if (iteration % 25 == 0):



                # Initiate for validation set

                val_state = sess.run(cell.zero_state(batch_size, tf.float32))



                val_acc_ = []

                val_loss_ = []

                for x_v, y_v in sample_batch(X_val, y_val, batch_size):

                    # Feed

                    feed = {inputs: x_v, target: y_v, keep_prob: 1.0, initial_state: val_state}



                    # Loss

                    loss_v, state_v, acc_v = sess.run([cost, final_state, accuracy], feed_dict=feed)



                    val_acc_.append(acc_v)

                    val_loss_.append(loss_v)



                # Print info

                print("Epoch: {}/{}".format(e+1, epochs),

                      "Iteration: {:d}".format(iteration),

                      "Validation loss: {:6f}".format(np.mean(val_loss_)),

                      "Validation acc: {:.6f}".format(np.mean(val_acc_)))



                # Store

                validation_acc.append(np.mean(val_acc_))

                validation_loss.append(np.mean(val_loss_))



            # Iterate

            iteration += 1



    saver.save(sess, "checkpoints/lstm.ckpt")

Epoch: 1/1 Iteration: 5 Train loss: 1.357857 Train acc: 0.360000
Epoch: 1/1 Iteration: 10 Train loss: 1.336112 Train acc: 0.360000
Epoch: 1/1 Iteration: 15 Train loss: 1.321885 Train acc: 0.540000
Epoch: 1/1 Iteration: 20 Train loss: 1.424419 Train acc: 0.220000
Epoch: 1/1 Iteration: 25 Train loss: 1.365082 Train acc: 0.360000
Epoch: 1/1 Iteration: 25 Validation loss: 1.247948 Validation acc: 0.560000


In [16]:
def sample_batch(X, y, batch_size):

    for b in range(0, len(X)-(len(X)%batch_size)-batch_size, batch_size):

        yield X[b:b + batch_size], y[b:b + batch_size]



# Constants

samples, seq_len, features = X_train.shape

n_classes = y_train.shape[1]



# Hyperparameters

lstm_size = 3*features

lstm_layers = 2

dropout = 0.8

batch_size = 100

learning_rate = 0.0001  # default is 0.001

epochs = 3



graph = tf.Graph()



with graph.as_default():

    inputs = tf.placeholder(dtype=tf.float32, shape=[None, seq_len, features], name='inputs')

    with tf.name_scope("Target"):

        target = tf.placeholder(dtype=tf.int32, shape=[None, n_classes], name='target')

    keep_prob = tf.placeholder(tf.float32, name = 'keep')



with graph.as_default():

    lstm_in = tf.transpose(inputs, [1, 0, 2])  # reshape into (seq_len, samples, features)

    lstm_in = tf.reshape(lstm_in, [-1, features])  # Now (seq_len*samples, features)



    # To cells

    lstm_in = tf.layers.dense(lstm_in, lstm_size, activation=None)



    # Open up the tensor into a list of seq_len pieces

    lstm_in = tf.split(lstm_in, seq_len, 0)



    # Add LSTM layers

    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)

    cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)

    initial_state = cell.zero_state(batch_size, tf.float32)



with graph.as_default():

    outputs, final_state = tf.contrib.rnn.static_rnn(cell, lstm_in, dtype=tf.float32, initial_state=initial_state)



    # We only need the last output tensor to pass into a classifier

    logits = tf.layers.dense(outputs[-1], n_classes, name='logits')



    # Cost function and optimizer

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target))



    # No grad clipping

    # optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)



    # Grad clipping

    train_op = tf.train.AdamOptimizer(learning_rate)



    gradients = train_op.compute_gradients(cost)

    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]

    optimizer = train_op.apply_gradients(capped_gradients)



    # Accuracy

    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(target, 1))

    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')



if (os.path.exists('checkpoints') == False):

    os.system('mkdir checkpoints')



train_acc = []

train_loss = []



validation_acc = []

validation_loss = []



with graph.as_default():

    saver = tf.train.Saver()



with tf.Session(graph=graph) as sess:

    sess.run(tf.global_variables_initializer())

    iteration = 2



    for e in range(epochs):

        # Initialize

        state = sess.run(initial_state)



        # Loop over batches

        for x, y in sample_batch(X_train, y_train, batch_size):



            # Feed dictionary

            feed = {inputs: x, target: y, keep_prob: dropout, initial_state: state}



            loss, _, state, acc = sess.run([cost, optimizer, final_state, accuracy],

                                           feed_dict=feed)

            train_acc.append(acc)

            train_loss.append(loss)



            # Print at each 5 iters

            if (iteration % 5 == 0):

                print("Epoch: {}/{}".format(e+1, epochs),

                      "Iteration: {:d}".format(iteration),

                      "Train loss: {:6f}".format(loss),

                      "Train acc: {:.6f}".format(acc))



            # Compute validation loss at every 25 iterations

            if (iteration % 25 == 0):



                # Initiate for validation set

                val_state = sess.run(cell.zero_state(batch_size, tf.float32))



                val_acc_ = []

                val_loss_ = []

                for x_v, y_v in sample_batch(X_val, y_val, batch_size):

                    # Feed

                    feed = {inputs: x_v, target: y_v, keep_prob: 1.0, initial_state: val_state}



                    # Loss

                    loss_v, state_v, acc_v = sess.run([cost, final_state, accuracy], feed_dict=feed)



                    val_acc_.append(acc_v)

                    val_loss_.append(loss_v)



                # Print info

                print("Epoch: {}/{}".format(e+1, epochs),

                      "Iteration: {:d}".format(iteration),

                      "Validation loss: {:6f}".format(np.mean(val_loss_)),

                      "Validation acc: {:.6f}".format(np.mean(val_acc_)))



                # Store

                validation_acc.append(np.mean(val_acc_))

                validation_loss.append(np.mean(val_loss_))



            # Iterate

            iteration += 1



    saver.save(sess, "checkpoints/lstm.ckpt")

Epoch: 1/3 Iteration: 5 Train loss: 1.406535 Train acc: 0.240000
Epoch: 1/3 Iteration: 10 Train loss: 1.343435 Train acc: 0.030000
Epoch: 1/3 Iteration: 15 Train loss: 1.405002 Train acc: 0.230000
Epoch: 2/3 Iteration: 20 Train loss: 1.408165 Train acc: 0.220000
Epoch: 2/3 Iteration: 25 Train loss: 1.291842 Train acc: 0.510000
Epoch: 2/3 Iteration: 25 Validation loss: 1.299136 Validation acc: 0.600000
Epoch: 3/3 Iteration: 30 Train loss: 1.146003 Train acc: 0.830000
Epoch: 3/3 Iteration: 35 Train loss: 1.411832 Train acc: 0.290000
Epoch: 3/3 Iteration: 40 Train loss: 1.402940 Train acc: 0.320000
