# IoT Equipment Failure Prediction using Sensor data
## 1 Environment Setup
### 1.1 Import dependent libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import pdb
import json
import re
import requests
import sys
import types
import ibm_boto3

In [17]:
# Import libraries
from io import StringIO
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from botocore.client import Config

## 2 Create IoT Predictive Analytics Functions

In [3]:
# Function to extract Column names of dataset
def dataset_columns(dataset):
    return list(dataset.columns.values)

# Function to train Logistic regression model
def train_logistic_regression(x_vals, y_vals):
    logistic_regression_model = LogisticRegression()
    logistic_regression_model.fit(x_vals, y_vals)
    return logistic_regression_model

# Function to return Predicted values
def score_data(trained_model, x_vals):
    ypredict = trained_model.predict(x_vals)
    return ypredict

# Function to calculate Prediction accuracy of model
def model_accuracy(trained_model, variables, targets):
    accuracy_score = trained_model.score(variables, targets)
    return accuracy_score

# Function to generate Confusion matrix
def confusion_matrix(actfail, predictfail):
  # Compute Confusion matrix
  print("Actual, Predicted Observations:  ",len(actfail), len(predictfail))
  # print(actfail, predictfail)
  anpn = 0
  anpy = 0
  aypn = 0
  aypy = 0
  
  for i in range(len(actfail)):
      if (actfail[i]==0 and predictfail[i]==0):
          anpn = anpn + 1
      elif (actfail[i]==0 and predictfail[i]==1):
          anpy = anpy + 1
      elif (actfail[i]==1 and predictfail[i]==0):
          aypn = aypn + 1
      else:
          aypy = aypy + 1
  # Confusoin matrix
  print ("--------------------------------------------")
  print ("Confusion Matrix")
  print ("--------------------------------------------")
  print ("              ", "Predicted N", "Predicted Y")
  print ("Actual N      ", anpn,"          ", anpy) 
  print ("Actual Y      ", aypn,"          ", aypy)
  print ("--------------------------------------------")
  print ("Total observations  :  ", anpn+anpy+aypn+aypy)
  print ("False Positives     :  ", anpy)
  print ("False Negatives     :  ", aypn)
  print ("Overall Accuracy    :  ", round((float(anpn+aypy)/float(anpn+anpy+aypn+aypy))*100, 2), "%")
  print ("Sensitivity/Recall  :  ", round((float(aypy)/float(aypn+aypy))*100, 2), "%")
  print ("Specificity         :  ", round((float(anpn)/float(anpn+anpy))*100, 2), "%")
  print ("Precision           :  ", round((float(aypy)/float(anpy+aypy))*100, 2), "%")
  print ("--------------------------------------------")


## 3 Configure Parameters for Change Point Detection
### 3.1 Read DSX Configuration file and load all parameters

Complete below 2 steps before executing the rest of the cells

1. Configure the parameters in JSON file and upload to Object storage
2. Set the Configuration .json file name in the next section


#### 3.1.1 Set the name of the .json configuration file

In [4]:
# Specify file names for configuration files
v_sampleConfigFileName = "iotpredict_config.json"

#### 3.1.2 Insert the Object Storage file credentials to read the .json configuration file

In [5]:
# @hidden_cell
# The section below needs to be modified:
#    Insert your credentials to read data from your data sources and replace 
#    the credentials_1 = {} section below
# @hidden_cell
# The following code contains the credentials for a file in your IBM Cloud Object Storage.
# You might want to remove those credentials before you share your notebook.
credentials_1 = {
    'IBM_API_KEY_ID': 'xxxxxxx',
    'IAM_SERVICE_ID': 'xxxxxxx',
    'ENDPOINT': 'https://s3-api.us-geo.objectstorage.service.networklayer.com',
    'IBM_AUTH_ENDPOINT': 'https://iam.ng.bluemix.net/oidc/token',
    'BUCKET': 'iotpredictivecloudstorage-donotdelete-xxxxx',
    'FILE': 'iotpredict_config.txt'
}


### 3.2 Read Configuration parametric values

In [6]:
# This function accesses a file in your Object Storage.
# The definition uses your credentials that you set in the previous step.
cos = ibm_boto3.client('s3',
                    ibm_api_key_id=credentials_1['IBM_API_KEY_ID'],
                    ibm_service_instance_id=credentials_1['IAM_SERVICE_ID'],
                    ibm_auth_endpoint=credentials_1['IBM_AUTH_ENDPOINT'],
                    config=Config(signature_version='oauth'),
                    endpoint_url=credentials_1['ENDPOINT'])

def get_file(filename):
    '''Retrieve file from Cloud Object Storage'''
    fileobject = cos.get_object(Bucket=credentials_1['BUCKET'], Key=filename)['Body']
    return fileobject

def load_string(fileobject):
    '''Load the file contents into a Python string'''
    text = fileobject.read()
    return text

def put_file(filename, filecontents):
    '''Write file to Cloud Object Storage'''
    resp = cos.put_object(Bucket=credentials_1['BUCKET'], Key=filename, Body=filecontents)
    return resp

In [7]:
# Function to Read json parametric values
def f_getconfigval(injsonstr, invarname):
    # paramname, paramvalue
    # Unpack the json parameter values
    # This section requires regex
    for i in range(len(injsonstr)):
        pair = injsonstr[i]
        # Return parametric value
        if pair['paramname'] == invarname:
            return(pair['paramvalue'])

In [8]:
# Read configuration parameters from JSON file
# @hidden_cell
# The section below needs to be modified:
#    Insert your credentials to read data from your data sources and replace 
#    the idaConnect() section below
# This function accesses a file in your Object Storage. The definition contains your
#    credentials
# Your data file was loaded into a StringIO object and you can process the data.
# Please read the documentation of pandas to learn more about your possibilities to load your data.
# pandas documentation: http://pandas.pydata.org/pandas-docs/stable/io.html
inputfo = load_string(get_file(v_sampleConfigFileName))
inputfo = inputfo.decode('utf-8')

d = json.loads(inputfo)
print(d)


[{'paramname': 'features', 'paramvalue': "['atemp', 'PID', 'outpressure', 'inpressure', 'temp']"}, {'paramname': 'target', 'paramvalue': 'fail'}, {'paramname': 'data_size', 'paramvalue': '0.7'}]


In [9]:
# Read JSON configuration parametric values
# Unpack the json parameter values
# This section uses regex
v_feature_list = eval("list("+ f_getconfigval(d, "features") +")")
v_target = str(f_getconfigval(d, "target"))
v_train_datasize = float(f_getconfigval(d, "data_size"))


In [10]:
# Verify configuration parametric values
# print (feature_list, target, train_datasize)
print (v_feature_list, v_target, v_train_datasize)

['atemp', 'PID', 'outpressure', 'inpressure', 'temp'] fail 0.7


## 4 Read IoT Sensor data from database

In [11]:
# Read data from DB2 warehouse in BMX
# -----------------------------------
from ibmdbpy import IdaDataBase, IdaDataFrame

# Call function to read data for specific sensor
# @hidden_cell
# The section below needs to be modified:
#    Insert your credentials to read data from your data sources and replace 
#    the idaConnect() section below
# This connection object is used to access your data and contains your credentials.
idadb_d281f6cd34eb4bc98f0183a45598dbb9 = IdaDataBase(dsn='DASHDB;Database=BLUDB;Hostname=dashdb-entry-yp-lon02-01.services.eu-gb.bluemix.net;Port=50000;PROTOCOL=TCPIP;UID=dash100002;PWD=FNw_7glWu9B_')

df_iotdata = IdaDataFrame(idadb_d281f6cd34eb4bc98f0183a45598dbb9, 'DASH100002.IOT_SENSOR_DATA').as_dataframe()

# Check Number of observations read for analysis
print ("Number of Observations :", len(df_iotdata))
# Inspect a few observations
df_iotdata.head()


Number of Observations : 944


Unnamed: 0,popul,atemp,selfLR,ClinLR,DoleLR,PID,outpressure,inpressure,temp,fail
0,0,7,7,1,6,6,36,3,1,1
1,190,1,3,3,5,1,20,4,1,0
2,31,7,2,2,6,1,24,6,1,0
3,83,4,3,4,5,1,28,6,1,0
4,640,7,5,6,4,0,68,6,1,0


In [12]:
# Print dataset column names
datacolumns = dataset_columns(df_iotdata)
print ("Data set columns : ", list(datacolumns))

Data set columns :  ['popul', 'atemp', 'selfLR', 'ClinLR', 'DoleLR', 'PID', 'outpressure', 'inpressure', 'temp', 'fail']


## 5 Run Failure Prediction algorithm on IoT data
### 5.1 Split data into Training and Test data

In [13]:
# Split Training and Testing data
train_x, test_x, train_y, test_y = train_test_split(df_iotdata[v_feature_list], df_iotdata[v_target], train_size=v_train_datasize)
print ("Train x counts : ", len(train_x), len(train_x.columns.values))
print ("Train y counts : ", len(train_y))
 
print ("Test x counts : ", len(test_x), len(test_x.columns.values))
print ("Test y counts : ", len(test_y))


Train x counts :  660 5
Train y counts :  660
Test x counts :  284 5
Test y counts :  284


### 5.2 Train the Predictive model

In [14]:
# Training Logistic regression model
trained_logistic_regression_model = train_logistic_regression(train_x, train_y)

train_accuracy = model_accuracy(trained_logistic_regression_model, train_x, train_y)

# Testing the logistic regression model
test_accuracy = model_accuracy(trained_logistic_regression_model, test_x, test_y)

print ("Training Accuracy : ", round(train_accuracy * 100, 2), "%")



Training Accuracy :  90.0 %


### 5.3 Score the Test data using the Trained model

In [15]:
# Model accuracy: Score and construct Confusion matrix for Test data
actfail = test_y.values
predictfail = score_data(trained_logistic_regression_model, test_x)


## 6 Confusion matrix for deeper analysis of Prediction accuracy
#####   Confusion matrix outputs below can be used for calculating more customised Accuracy metrics

In [16]:
# Print Count of Actual fails, Predicted fails
# Print Confusion matrix
confusion_matrix(actfail, predictfail)

Actual, Predicted Observations:   284 284
--------------------------------------------
Confusion Matrix
--------------------------------------------
               Predicted N Predicted Y
Actual N       151            17
Actual Y       7            109
--------------------------------------------
Total observations  :   284
False Positives     :   17
False Negatives     :   7
Overall Accuracy    :   91.55 %
Sensitivity/Recall  :   93.97 %
Specificity         :   89.88 %
Precision           :   86.51 %
--------------------------------------------
