# *Merged Jupyter Notebook*

<hr><font color="green"><h1>from file: PredictiveMaintenanceMainNotebook1</h1></font>

# Predicitive maintanance model

Made by: Joey Einerhand, CÃ©dric Cortenraede, Lennox Narinx, Giuseppe Collura

## Get file
All files from the data directory will be loaded, _*this can take some time_\
based on the loaded data a dataframe will be created


In [1]:
import pandas as pd
import os

# Update "data_dir" location if necessary
data_dir = "data"
files = os.listdir(data_dir)
files = [file for file in files if file != "documentation.txt" and file != 'description.txt' and file != 'profile.txt']

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data'

In [None]:
df = pd.DataFrame()

# Load the sensor data
# Takes the mean of each row to illuminate different frequencies
for i in range(len(files)):
    df[files[i].strip(".txt")] = pd.read_csv(os.path.join(data_dir, files[i]), sep="\t", header=None, names=[files[i]]).mean(axis=1).to_numpy()

# Load the profiles
profiles = ["Cooler condition", "Valve condition", "Internal pump leakage", "Hydraulic accumulator", "Stable flag"]
for i in range(len(profiles)):
    df[profiles[i]] = pd.read_csv(os.path.join(data_dir,'profile.txt'), sep="\t", header=None)[i].to_numpy()

df

## Analyse data
- The analysis looks at the correlation of the different columns in comparison to each other\
- This gives a filter to use later\
- The filter will be used to determine the columns impacting for the result

In [None]:
import seaborn as sb
import matplotlib.pyplot as plt
import math

In [None]:
plt.figure(figsize=(30, 25))

i = 1
for column in df:
    data = df[column]
    plt.subplot(math.ceil(len(df.columns) / 3), 3, i)
    
    plt.title(column)
    plt.hist(data)
    
    i += 1

plt.show()

In [None]:
plt.figure(figsize=(30, 25))

i = 1
for column in df:
    data = df[column]
    plt.subplot(math.ceil(len(df.columns) / 3), 3, i)
    
    plt.title(column)
    plt.boxplot(data)
    
    i += 1

plt.show()

In [None]:
corr = df.corr()
plt.figure(figsize=(30, 10))
sb.heatmap(corr, cmap="Greens", annot=True)

## Making model

In [None]:
import numpy as np
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
import tensorflow as tf

# Scale down the values
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()

X_data = X_scaler.fit_transform(df[["CE", "CP", "EPS1", "VS1"]])
Y_data = Y_scaler.fit_transform(df[["Cooler condition"]])

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test = train_test_split(X_data, test_size=0.2, random_state=25)
y_train, y_test = train_test_split(Y_data, test_size=0.2, random_state=25)

<hr><font color="green"><h1>from file: PredictiveMaintenanceMainNotebook2</h1></font>

# Predictive maintanance

Made by: Joey Einerhand, CÃ©dric Cortenraede, Lennox Narinx, Giuseppe Collura

## Data loading
_Loading all the text files with data to use for the prediction model._

In [None]:
import pandas as pd

# Cooler condition (%)
df_ce = pd.read_csv("data/CE.txt", sep="\t", header=None, names=["CE"])

# Valve condition (%)
df_se = pd.read_csv("data/SE.txt", sep="\t", header=None, names=["SE"])

# Internal pump leakage (l/min)
df_fs1 = pd.read_csv("data/FS1.txt", sep="\t", header=None, names=["FS1"])
df_fs2 = pd.read_csv("data/FS2.txt", sep="\t", header=None, names=["FS2"])

# Hydraulic accumulator (bar)
df_ps1 = pd.read_csv("data/PS1.txt", sep="\t", header=None, names=["PS1"])
df_ps2 = pd.read_csv("data/PS2.txt", sep="\t", header=None, names=["PS2"])
df_ps3 = pd.read_csv("data/PS3.txt", sep="\t", header=None, names=["PS3"])
df_ps4 = pd.read_csv("data/PS4.txt", sep="\t", header=None, names=["PS4"])
df_ps5 = pd.read_csv("data/PS5.txt", sep="\t", header=None, names=["PS5"])
df_ps6 = pd.read_csv("data/PS6.txt", sep="\t", header=None, names=["PS6"])

# Temperature (Â°C)
df_ts1 = pd.read_csv("data/TS1.txt", sep="\t", header=None, names=["TS1"])
df_ts2 = pd.read_csv("data/TS2.txt", sep="\t", header=None, names=["TS2"])
df_ts3 = pd.read_csv("data/TS3.txt", sep="\t", header=None, names=["TS3"])
df_ts4 = pd.read_csv("data/TS4.txt", sep="\t", header=None, names=["TS4"])

# Target Variables
df_profile = pd.read_csv("data/profile.txt", sep="\t", header=None, names=["CC, VC, IPL, HA, SF"])


# Severity grade for each type
df_severity = pd.read_csv("data/profile.txt", sep="\t", header=None, names=["Severity"])

## Data Cleaning
_After loading all the data this data will be transformed to be used for the prediction model._


1. First the sensors with multiple data points will be added to a single DataFrame.  
2. Then all DataFrames will be added to a single DataFrame for easier access to the data.
3. The order of columns will be changed next for a more logical order.

In [None]:
# Make seperate dataframes from loaded data
# Dataframe for Internal pump leakage
df_fs = pd.DataFrame()

df_fs["FS1"] = df_fs1["FS1"].to_numpy()
df_fs["FS2"] = df_fs2["FS2"].to_numpy()

# Dataframe for hydraulic accumulator
df_ps = pd.DataFrame()
df_ps["PS1"] = df_ps1["PS1"].to_numpy()
df_ps["PS2"] = df_ps2["PS2"].to_numpy()
df_ps["PS3"] = df_ps3["PS3"].to_numpy()
df_ps["PS4"] = df_ps4["PS4"].to_numpy()
df_ps["PS5"] = df_ps5["PS5"].to_numpy()
df_ps["PS6"] = df_ps6["PS6"].to_numpy()

# Dataframe for temperature
df_ts = pd.DataFrame()
df_ts["TS1"] = df_ts1["TS1"].to_numpy()
df_ts["TS2"] = df_ts2["TS2"].to_numpy()
df_ts["TS3"] = df_ts3["TS3"].to_numpy()
df_ts["TS4"] = df_ts4["TS4"].to_numpy()

df_cc = pd.DataFrame()
df_profile.value_counts(3)

In [None]:
# Add all data into a single DataFrame for easy access.
df = pd.concat([df_fs, df_ps, df_ts], axis=1, join="inner")
df["CE"] = df_ce["CE"].to_numpy()
df["SE"] = df_se["SE"].to_numpy()
df["Severity"] = df_severity["Severity"].to_numpy()

df = df.reindex(columns=["CE", "SE", "FS1", "FS2", "PS1", "PS2", "PS3", "PS4", "PS5", "PS6", "TS1", "TS2", "TS3", "TS4", "Severity"])

df

## Predicting LSTM
_Long Short Term Memory_

In [None]:
import matplotlib.pyplot as plt
import datetime
from keras.models import Sequential
from keras.layers import LSTM,Dropout,Dense
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn import metrics

plt.plot(df_ts["TS4"], label="TS4 plot")

# Get first 400 lines to use in predicting and training
#data = pd.DataFrame(index=range(0, 400), columns=["Time", "CE"])
data = pd.DataFrame(index=range(0, len(df_ts["TS4"])), columns=["CE","TS1", "TS2", "TS3", "TS4"])
for i in range(0, len(data)):
    data["CE"][i] = df["CE"][i]
    data["TS1"][i] = df_ts["TS1"][i]
    data["TS2"][i] = df_ts["TS2"][i]
    data["TS3"][i] = df_ts["TS3"][i]
    data["TS4"][i] = df_ts["TS4"][i]

#data["Time"] = data["Time"].astype(int)
data["CE"] = data["CE"].astype(int)
data["TS1"] = data["TS1"].astype(float)
data["TS2"] = data["TS2"].astype(float)
data["TS3"] = data["TS3"].astype(float)
data["TS4"] = data["TS4"].astype(float)

data.head()

In [None]:
X_scaler=MinMaxScaler(feature_range=(0,1))
Y_scaler=MinMaxScaler(feature_range=(0,1))

X_data = X_scaler.fit_transform(data[["TS1", "TS2", "TS3", "TS4"]])
Y_data = Y_scaler.fit_transform(data[["CE"]])

# split into train and test sets
# Train is the dataset which the model is trained on
# Test is the dataset which the model is verified with
train_size = int(len(X_data) * 0.8)
test_size = len(X_data) - train_size
trainX, testX = X_data[0:train_size,:], X_data[train_size:len(X_data),:]
trainY, testY = Y_data[0:train_size,:], Y_data[train_size:len(X_data),:]

print(len(trainX), len(testX))

In [None]:
# # convert an array of values into a dataset matrix
# def create_dataset(dataset, look_back=1):
# 	dataX, dataY = [], []
# 	for i in range(len(dataset)-look_back-1):
# 		a = dataset[i:(i+look_back), 0]
# 		dataX.append(a)
# 		dataY.append(dataset[i + look_back, 0])
# 	return np.array(dataX), np.array(dataY)

# # reshape into X=t and Y=t+1
# look_back = 1
# trainX, trainY = create_dataset(train, look_back)
# testX, testY = create_dataset(test, look_back)

In [None]:
# reshape into X=t and Y=t+1
look_back = 1
# trainX, trainY = create_dataset(train, look_back)
# testX, testY = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 4)))
#model.add(LSTM(4, return_sequences=True,stateful=True, batch_input_shape=(1, None,  look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, verbose=1)

In [None]:
# # make predictions
# import math
# from sklearn.metrics import mean_squared_error 

# trainPredict = model.predict(trainX)
# testPredict = model.predict(testX)
# # invert predictions
# trainPredict = X_scaler.inverse_transform(trainPredict)

# #reshape
# trainY.reshape((len(trainY), 1))


# trainY = scaler.inverse_transform(trainY)
# testPredict = scaler.inverse_transform(testPredict)


# testY = scaler.inverse_transform(testY)
# calculate root mean squared error
# trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
# print('Train Score: %.2f RMSE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
# print('Test Score: %.2f RMSE' % (testScore))

In [None]:
#Try to predict a portion of the dataset with the trained model
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# shift train predictions for plotting
trainPredictPlot = np.empty_like(X_data)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = np.empty_like(X_data)
testPredictPlot[:, :] = np.nan
#testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
testPredictPlot[len(trainPredict):len(X_data), :] = testPredict



In [None]:
# ---------------------------------------
#  Convert data to something we can plot
# ---------------------------------------

def InvertScaledData(data):
    inversedData = np.empty_like(data)
    inversedData[:, :] = np.nan
    for i in range(len(data)):
        inversedData[i] = 1 - data[i]
        
    return inversedData

newTrainPredictPlot = InvertScaledData(trainPredictPlot)
newTestPredictPlot = InvertScaledData(testPredictPlot)

# plot baseline and predictions
# plt.plot(X_data, color="red")
# plt.plot(newTrainPredictPlot, color="blue")
# plt.plot(newTestPredictPlot, color="yellow")
plt.plot(Y_data, color="red")
plt.plot(trainPredictPlot, color="blue")
plt.plot(testPredictPlot, color="yellow")
plt.show()

In [None]:
## ----------------------------
## METRICS
## ----------------------------
def timeseries_evaluation_metrics_func(y_true, y_pred):
    def mean_absolute_percentage_error(y_true, y_pred): 
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    print('Evaluation metric results:-')
    print(f'MSE is : {metrics.mean_squared_error(y_true, y_pred)}')
    print(f'MAE is : {metrics.mean_absolute_error(y_true, y_pred)}')
    print(f'RMSE is : {np.sqrt(metrics.mean_squared_error(y_true, y_pred))}')
    print(f'MAPE is : {mean_absolute_percentage_error(y_true, y_pred)}')
    print(f'R2 is : {metrics.r2_score(y_true, y_pred)}',end='\n\n') 

print("Training metrics")
timeseries_evaluation_metrics_func(trainY, trainPredict[:,0])
print("Testing metrics")
timeseries_evaluation_metrics_func(testY, testPredict[:,0])

# trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
# print('Train Score: %.2f RMSE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
# print('Test Score: %.2f RMSE' % (testScore))

In [None]:
x = np.array([2805])

x = x.reshape((1, len(x), 1))

ypred = model.predict(x, verbose=0)

ypred

In [None]:
n_head = 1000
dataset_predict = dataset
print(dataset)

dataset_predict = dataset_predict.reshape((-1))
#print(prediction_list)
#OriginalDataset = scaler.inverse_transform(dataset[-look_back:])
prediction_list = dataset_predict[-look_back:]
print(prediction_list)
print(prediction_list[-look_back:])
print(type(prediction_list[-look_back:]))

In [None]:
prediction_list = prediction_list[look_back-1:]

predictions_df = pd.DataFrame(index=range(len(prediction_list)), columns=["predictions"])
for i in range(len(prediction_list)):
    predictions_df["predictions"][i] = prediction_list[i]

predictions_df = scaler.inverse_transform(predictions_df)
print(predictions_df)
# Make new dataframe for predictions so we can plot them
# Make the index start after the baseline stops. So if baseline has index 0 to 1999,
# The first element in predictions_df should have an index of 2000.
results_df = pd.DataFrame(index=range(len(dataset) - 1, len(dataset) - 1 + len(prediction_list)), columns=["predictions"])
for i in range(len(predictions_df)):
    results_df["predictions"][len(dataset) - 1 + i] = predictions_df[i][0]
print(results_df)
# plot baseline
dataset.reshape(1, -1)
plt.plot(scaler.inverse_transform(dataset), color="red")
# plt.plot(trainPredictPlot, color="blue")
# plt.plot(testPredictPlot, color="yellow")

# Plot the future predictions
plt.plot(results_df, color="purple")
plt.show()

In [None]:

for i in range(n_head):
    x = np.array([len(dataset) + i])
    x = x.reshape((1, len(x), 1))
    x = scaler.fit_transform(x)
    ypred = model.predict(x, verbose=0)
    results_df["predictions"][len(dataset) + i] = scaler.inverse_transform(ypred)

   

In [None]:
plt.plot(scaler.inverse_transform(dataset), color="red")
# plt.plot(trainPredictPlot, color="blue")
# plt.plot(testPredictPlot, color="yellow")

# Plot the future predictions
plt.plot(results_df, color="purple")
print(results_df)
plt.show() 

In [None]:
# # Predict
# for _ in range(n_head):
#     x = prediction_list[-look_back:]
#     print("The number to use when predicting is: " + str(x))
#     x = x.reshape((1, look_back, 1))
#     print(model.predict(x)[0][0])
#     out = model.predict(x)[0][0]
#     print("The predicted number is: " + str(out))
#     #out = out * min(dataset_predict) + (max(dataset_predict)+min(dataset_predict))
#     prediction_list = np.append(prediction_list, (out))
    
# x = pd.DataFrame(index=range(len(dataset_predict) - 1, len(dataset_predict) - 1 + n_head), columns=["TS4"])
# for i in range(len(dataset_predict), len(dataset_predict) + n_head):
#     x["TS4"].append(i)
results_df = pd.DataFrame(index=range(len(dataset), len(dataset) - 1 + n_head), columns=["predictions"])
results_df.head()