In [None]:
%%local
!sudo apt-get install -y python-tk

# Predictive Maintenance Using IoT Sensor Data with Tensorflow

In this analysis we use streaming IoT sensors to track the state of a SCARA (Selective Compliance Assembly Robot Arm) Robot and predict anomalies that may indicate impending failure.

When an anomaly is detected, alerts can be sent to trigger maintenance request before the anomalous readings lead to failures.

In [None]:
%%local
import os
import pandas as pd
import csv
import numpy as np
import random
import glob
import matplotlib
import matplotlib.pyplot as plt
import random
import plotly
%matplotlib inline

#import tensorflow libraries
import tensorflow as tf
import shutil
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers
from tensorflow.contrib.learn.python.learn import learn_runner
import tensorflow.contrib.metrics as metrics
import tensorflow.contrib.rnn as rnn

In [None]:
%%local
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
%%local
#import the data from MapR-FS, super easy
#df = pd.read_csv("/user/user01/rw_etl.csv/part-00000-45866095-f76d-4f6c-ba2d-a07f0ab2dc04.csv").sort_values(['TimeStamp'], ascending=True).reset_index()

#TEMPORARY - load data from local container for testing purposes while cluster is down
df = pd.read_csv("tmp/data/part-00000-45866095-f76d-4f6c-ba2d-a07f0ab2dc04.csv").sort_values(['TimeStamp'], ascending=True).reset_index()

df.drop(['::[scararobot]Ax_J1.PositionCommand','::[scararobot]Ax_J1.TorqueFeedback','::[scararobot]Ax_J2.PositionCommand','::[scararobot]Ax_J2.TorqueFeedback','::[scararobot]Ax_J3.TorqueFeedback','::[scararobot]Ax_J6.TorqueFeedback','::[scararobot]ScanTimeAverage','::[scararobot]Ax_J6.PositionCommand','::[scararobot]Ax_J3.PositionCommand','index'], axis=1, inplace=True)
df['TimeStamp']=pd.to_datetime(df['TimeStamp'])
df.head(5)

In [19]:
%%local
trace1 = go.Scatter(
    x = df['TimeStamp'].iloc[50000:],
    y = df['::[scararobot]Ax_J1.ActualPosition'].iloc[50000:]
)
trace2 = go.Scatter(
    x = df['TimeStamp'].iloc[50000:],
    y = df['::[scararobot]Ax_J3.TorqueCommand'].iloc[50000:]
)

fig = tools.make_subplots(rows=2, cols=1, print_grid = False)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)

fig['layout'].update(height=400, title='Sensor Readings Over Time')
fig['layout']['yaxis1'].update(title='Position')
fig['layout']['yaxis2'].update(title='Torque<br>Command')
fig['layout']['xaxis2'].update(title='Time')
iplot(fig)

### Training a Recurrent Neural Network on Aggregated Sensor Data

We can detect potential failures by looking at the aggregate of the different readings in our sensor data.  The network will learn to predict the expected sensor readings.  When the sensor readings deviate from those expected, this indicates an anomaly that may signal impending failure.  

We can use these detections to schedule maintenance before failures occur.

In [None]:
%%local
# NOTE:  Move this into a function!  It doesn't need to live here.
# Aggregate sensor data and prepare it for the model

#remove rows that are all zeros
df1 = df[df["::[scararobot]speed"] != 0].set_index('TimeStamp')   

#create a new column that will be our feature variable for our model
#df1['total']=df1.sum(axis=1)
df1['Total']= df1.select_dtypes(include=['float64','float32']).apply(lambda row: np.sum(row),axis=1)

#convert into a time series object
ts = pd.Series(df1['Total'])

#prepare data and inputs for our TF model
num_periods = 100
f_horizon = 1       #number of periods into the future we are forecasting
TS = np.array(ts)   #convert time series object to an array

#create our training input data set "X"
x_data = TS[:(len(TS)-(len(TS) % num_periods))]
x_batches = x_data.reshape(-1, num_periods, 1)


#create our training output dataset "y"
y_data = TS[1:(len(TS)-(len(TS) % num_periods))+f_horizon]
y_batches = y_data.reshape(-1, num_periods, 1)


#create our test X and y data
def test_data(series,forecast,num_periods):
    test_x_setup = series[-(num_periods + forecast):]
    testX = test_x_setup[:num_periods].reshape(-1, num_periods, 1)
    testY = TS[-(num_periods):].reshape(-1, num_periods, 1)
    return testX,testY

X_test, Y_test = test_data(TS,f_horizon,num_periods)

In [None]:
%%local
trace = go.Scatter(
    x = df1.index,
    y = df1['Total']
)


layout = dict(title = 'Aggregate Readings Over Time', height=400)
fig = dict(data=[trace], layout=layout)
iplot(fig)

In [None]:
%%local


#set up our TF model parameters

tf.reset_default_graph()   #We didn't have any previous graph objects running, but this would reset the graphs

inputs = 1            #number of vectors submitted
hidden = 100          #number of neurons we will recursively work through, can be changed to improve accuracy
output = 1            #number of output vectors

X = tf.placeholder(tf.float32, [None, num_periods, inputs], name = "X")   #create variable objects
y = tf.placeholder(tf.float32, [None, num_periods, output], name = "y")


basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden, activation=tf.nn.relu)   #create our RNN object
rnn_output, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)               #choose dynamic over static

learning_rate = 0.001   #small learning rate so we don't overshoot the minimum

stacked_rnn_output = tf.reshape(rnn_output, [-1, hidden])           #change the form into a tensor
stacked_outputs = tf.layers.dense(stacked_rnn_output, output)        #specify the type of layer (dense)
outputs = tf.reshape(stacked_outputs, [-1, num_periods, output])          #shape of results
 
loss = tf.reduce_sum(tf.square(outputs - y),name='loss')    #define the cost function which evaluates the quality of our model
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)          #gradient descent method
training_op = optimizer.minimize(loss)          #train the result of the application of the cost_function                                 

init = tf.global_variables_initializer()

epochs = 1000     #number of iterations or training cycles, includes both the FeedFoward and Backpropogation

mse_list = []
epoch_list = []
with tf.Session() as sess:
    init.run()
    for ep in range(epochs):
        sess.run(training_op, feed_dict={X: x_batches, y: y_batches})
        if ep % 100 == 0:
            mse = loss.eval(feed_dict={X: x_batches, y: y_batches})
            mse_list.append(mse)
            epoch_list.append(ep)
            print "Epoch: " + str(ep) + "\tMSE: " + mse
            
    y_pred = sess.run(outputs, feed_dict={X: X_test})
    #print(y_pred[:,(num_periods-1):num_periods])
    
    saver = tf.train.Saver()   #we are going to save the model
    DIR="model"  #path where the model will be saved
    saver.save(sess, os.path.join(DIR,"RWsensorTFmodel"),global_step = epochs)




In [None]:
%%local

#Plot our test y data and our y-predicted forecast
plt.title("Forecast vs Actual", fontsize=14)
plt.plot(pd.Series(np.ravel(Y_test)), "bo", markersize=10, label="Actual")
#plt.plot(pd.Series(np.ravel(Y_test)), "w*", markersize=10)
plt.plot(pd.Series(np.ravel(y_pred)), "r.", markersize=10, label="Forecast")
plt.legend(loc="upper left")
plt.xlabel("Time Periods")
plt.show()