# Predicitive maintanance model

Made by: Joey Einerhand, CÃ©dric Cortenraede, Lennox Narinx, Giuseppe Collura

## Get file
All files from the data directory will be loaded, _*this can take some time_\
based on the loaded data a dataframe will be created

In [1]:
import pandas as pd
import os

# Update "data_dir" location if necessary
data_dir = "data"
files = os.listdir(data_dir)
files = [file for file in files if file != "documentation.txt" and file != 'description.txt' and file != 'profile.txt']

In [None]:
df = pd.DataFrame()

# Load the sensor data
# Takes the mean of each row to illuminate different frequencies
for i in range(len(files)):
    df[files[i].strip(".txt")] = pd.read_csv(os.path.join(data_dir, files[i]), sep="\t", header=None, names=[files[i]]).mean(axis=1).to_numpy()

# Load the profiles
profiles = ["Cooler condition", "Valve condition", "Internal pump leakage", "Hydraulic accumulator", "Stable flag"]
for i in range(len(profiles)):
    df[profiles[i]] = pd.read_csv(os.path.join(data_dir,'profile.txt'), sep="\t", header=None)[i].to_numpy()

df

## Analyse data
- The analysis looks at the correlation of the different columns in comparison to each other\
- This gives a filter to use later\
- The filter will be used to determine the columns impacting for the result

In [None]:
import seaborn as sb
import matplotlib.pyplot as plt
import math

In [None]:
plt.figure(figsize=(30, 25))

i = 1
for column in df:
    data = df[column]
    plt.subplot(math.ceil(len(df.columns) / 3), 3, i)
    
    plt.title(column)
    plt.hist(data)
    
    i += 1

plt.show()

In [None]:
plt.figure(figsize=(30, 25))

i = 1
for column in df:
    data = df[column]
    plt.subplot(math.ceil(len(df.columns) / 3), 3, i)
    
    plt.title(column)
    plt.boxplot(data)
    
    i += 1

plt.show()

In [None]:
corr = df.corr()
plt.figure(figsize=(30, 10))
sb.heatmap(corr, cmap="Greens", annot=True)

## Making model
## Predicting LSTM
_Long Short Term Memory_

In [None]:
import matplotlib.pyplot as plt
import datetime
from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn import metrics

plt.plot(df["Cooler condition"], label="Cooler condition plot")

data = pd.DataFrame(index=range(0, len(df["Cooler condition"])), columns=["Cooler condition","TS1", "TS2", "TS3", "TS4"])
for i in range(0, len(data)):
    data["Cooler condition"][i] = df["Cooler condition"][i]
    data["TS1"][i] = df["TS1"][i]
    data["TS2"][i] = df["TS2"][i]
    data["TS3"][i] = df["TS3"][i]
    data["TS4"][i] = df["TS4"][i]

#data["Time"] = data["Time"].astype(int)
data["Cooler condition"] = data["Cooler condition"].astype(int)
data["TS1"] = data["TS1"].astype(float)
data["TS2"] = data["TS2"].astype(float)
data["TS3"] = data["TS3"].astype(float)
data["TS4"] = data["TS4"].astype(float)

data.head()

In [None]:
X_scaler=MinMaxScaler(feature_range=(0,1))
Y_scaler=MinMaxScaler(feature_range=(0,1))

X_data = X_scaler.fit_transform(data[["TS1", "TS2", "TS3", "TS4"]])
Y_data = Y_scaler.fit_transform(data[["Cooler condition"]])

# split into train and test sets
# Train is the dataset which the model is trained on
# Test is the dataset which the model is verified with
train_size = int(len(X_data) * 0.9)
test_size = len(X_data) - train_size
trainX, testX = X_data[0:train_size,:], X_data[train_size:len(X_data),:]
trainY, testY = Y_data[0:train_size,:], Y_data[train_size:len(X_data),:]

print(len(trainX), len(testX))

In [None]:
# # convert an array of values into a dataset matrix
# def create_dataset(dataset, look_back=1):
# 	dataX, dataY = [], []
# 	for i in range(len(dataset)-look_back-1):
# 		a = dataset[i:(i+look_back), 0]
# 		dataX.append(a)
# 		dataY.append(dataset[i + look_back, 0])
# 	return np.array(dataX), np.array(dataY)

# # reshape into X=t and Y=t+1
# look_back = 1
# trainX, trainY = create_dataset(train, look_back)
# testX, testY = create_dataset(test, look_back)

In [None]:
# reshape into X=t and Y=t+1
look_back = 1
# trainX, trainY = create_dataset(train, look_back)
# testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 4)))
#model.add(LSTM(4, return_sequences=True,stateful=True, batch_input_shape=(1, None,  look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, verbose=1)

In [None]:
# # make predictions
# import math
# from sklearn.metrics import mean_squared_error 

# trainPredict = model.predict(trainX)
# testPredict = model.predict(testX)
# # invert predictions
# trainPredict = X_scaler.inverse_transform(trainPredict)

# #reshape
# trainY.reshape((len(trainY), 1))


# trainY = scaler.inverse_transform(trainY)
# testPredict = scaler.inverse_transform(testPredict)


# testY = scaler.inverse_transform(testY)
# calculate root mean squared error
# trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
# print('Train Score: %.2f RMSE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
# print('Test Score: %.2f RMSE' % (testScore))

In [None]:
#Try to predict a portion of the dataset with the trained model
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# shift train predictions for plotting
trainPredictPlot = np.empty_like(X_data)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(X_data)
testPredictPlot[:, :] = np.nan
#testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
testPredictPlot[len(trainPredict):len(X_data), :] = testPredict



In [None]:
# ---------------------------------------
#  Convert data to something we can plot
# ---------------------------------------


# plot baseline and predictions
# plt.plot(X_data, color="red")
# plt.plot(newTrainPredictPlot, color="blue")
# plt.plot(newTestPredictPlot, color="yellow")
plt.plot(Y_data, color="red")
plt.plot(trainPredictPlot, color="blue")
plt.plot(testPredictPlot, color="yellow")
plt.show()

In [None]:
## ----------------------------
## METRICS
## ----------------------------
def timeseries_evaluation_metrics_func(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred): 
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true, y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n') 
    
print("Training metrics")
timeseries_evaluation_metrics_func(trainY, trainPredict)
print("Testing metrics")
timeseries_evaluation_metrics_func(testY, testPredict)

# trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
# print('Train Score: %.2f RMSE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
# print('Test Score: %.2f RMSE' % (testScore))