In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from os import walk
import os
import math
from copy import deepcopy

import tensorflow as tf
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, recall_score, precision_score, mean_squared_error
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
from tensorflow.keras import layers

pd.set_option('display.max_columns', None)

# fix random seed for reproducibility
tf.random.set_seed(7)




In [2]:
## Read and Load Data
file_names = []
for (dirpath, dirnames, filenames) in walk("../Data/"):
    file_names.extend(filenames)

def read_data(file_name):
    data = pd.read_csv(os.path.join("../Data/", file_name+".txt"), sep = "\s+", header = None)
    col_names = ["unit_number", "time"]
    col_names += [f"operation{i}" for i in range(1, 4)]
    col_names += [f"sensor{i}" for i in range(1, 22)]
    data.columns=col_names

    return data


In [4]:

# Test set
test_FD001 = read_data("test_FD001")

# rul
rul = test_FD001["unit_number"]
rul_test = pd.read_csv(os.path.join("../Data/","RUL_FD001.txt"), sep = "\s+", header = None)
rul_test = rul_test[0].to_list()
rul_c_test = []
for i in rul:
    rul_c_test.append(rul_test[i-1])

test_FD001["RUL"] = rul_c_test - test_FD001["time"]

test_FD001

Unnamed: 0,unit_number,time,operation1,operation2,operation3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,sensor8,sensor9,sensor10,sensor11,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,RUL
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,21.61,553.90,2388.04,9050.17,1.3,47.20,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735,111
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,21.61,554.85,2388.01,9054.42,1.3,47.50,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916,110
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,21.61,554.11,2388.05,9056.96,1.3,47.50,521.97,2388.03,8130.10,8.4441,0.03,393,2388,100.0,39.08,23.4166,109
3,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,21.61,554.07,2388.03,9045.29,1.3,47.28,521.38,2388.05,8132.90,8.3917,0.03,391,2388,100.0,39.00,23.3737,108
4,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,21.61,554.16,2388.01,9044.55,1.3,47.31,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.4130,107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,100,194,0.0049,0.0000,100.0,518.67,643.24,1599.45,1415.79,14.62,21.61,553.41,2388.02,9142.37,1.3,47.69,520.69,2388.00,8213.28,8.4715,0.03,394,2388,100.0,38.65,23.1974,-174
13092,100,195,-0.0011,-0.0001,100.0,518.67,643.22,1595.69,1422.05,14.62,21.61,553.22,2388.05,9140.68,1.3,47.60,521.05,2388.09,8210.85,8.4512,0.03,395,2388,100.0,38.57,23.2771,-175
13093,100,196,-0.0006,-0.0003,100.0,518.67,643.44,1593.15,1406.82,14.62,21.61,553.04,2388.11,9146.81,1.3,47.57,521.18,2388.04,8217.24,8.4569,0.03,395,2388,100.0,38.62,23.2051,-176
13094,100,197,-0.0038,0.0001,100.0,518.67,643.26,1594.99,1419.36,14.62,21.61,553.37,2388.07,9148.85,1.3,47.61,521.33,2388.08,8220.48,8.4711,0.03,395,2388,100.0,38.66,23.2699,-177


In [24]:
test_data = deepcopy(test_FD001)

# Remove columns
columns_to_be_dropped = [0,1,2,3,4,5,9,10,14,20,22,23]

# Normalize dataset
scaler = StandardScaler()
test_df = scaler.fit_transform(test_data.iloc[:,1:-1])

test_df = pd.DataFrame(data = np.c_[test_data.iloc[:,0], test_df, test_data.iloc[:,-1]])

# Unique engines
num_train_machines = len(test_df[0].unique())

# Windowing or reshaping into (samples, time steps, features)
input_data = test_df.iloc[:,:-1]
window_length = 50
shift = 10
processed_test_data = []

# Windowing per engine
for i in np.arange(1, num_train_machines+1):
    temp_train_data = test_df.loc[test_df[0] == i].drop(columns = [0]).values

    num_batches = int((len(input_data) - window_length)/shift)+ 1 
    num_features = input_data.shape[1]
    output_data = np.repeat(np.nan, repeats = num_batches * window_length * num_features).reshape(num_batches, window_length,
                                                                                                    num_features)
    for batch in range(num_batches):
        output_data[batch,:,:] = input_data.iloc[(0+shift*batch):(0+shift*batch+window_length),:]
    
    processed_test_data.append(output_data)

processed_test_data = np.concatenate(processed_test_data)

print("Processed test data shape: ", processed_test_data.shape)

Processed test data shape:  (130500, 50, 26)


In [25]:
true_rul = test_df.iloc[:,-1]
true_rul

0        111.0
1        110.0
2        109.0
3        108.0
4        107.0
         ...  
13091   -174.0
13092   -175.0
13093   -176.0
13094   -177.0
13095   -178.0
Name: 26, Length: 13096, dtype: float64

In [20]:
model = tf.keras.models.load_model("../models/FD001_LSTM_2.h5")
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50, 128)           79360     
                                                                 
 dropout (Dropout)           (None, 50, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 50, 64)            49408     
                                                                 
 dropout_1 (Dropout)         (None, 50, 64)            0         
                                                                 
 lstm_2 (LSTM)               (None, 32)                12416     
                                                                 
 dropout_2 (Dropout)         (None, 32)                0         
                                                                 
 dense (Dense)               (None, 128)               

In [21]:
rul_pred = model.predict(processed_test_data).reshape(-1)
'''preds_for_each_engine = np.split(rul_pred, np.cumsum(num_test_windows_list)[:-1])
mean_pred_for_each_engine = [np.average(ruls_for_each_engine, weights = np.repeat(1/num_windows, num_windows)) 
                             for ruls_for_each_engine, num_windows in zip(preds_for_each_engine, num_test_windows_list)]
RMSE = np.sqrt(mean_squared_error(true_rul, mean_pred_for_each_engine))
print("RMSE: ", RMSE)'''



'preds_for_each_engine = np.split(rul_pred, np.cumsum(num_test_windows_list)[:-1])\nmean_pred_for_each_engine = [np.average(ruls_for_each_engine, weights = np.repeat(1/num_windows, num_windows)) \n                             for ruls_for_each_engine, num_windows in zip(preds_for_each_engine, num_test_windows_list)]\nRMSE = np.sqrt(mean_squared_error(true_rul, mean_pred_for_each_engine))\nprint("RMSE: ", RMSE)'

In [22]:
rul_pred

array([97.392426, 77.70498 , 64.72442 , ..., 31.67925 , 27.840626,
       18.403011], dtype=float32)