In [12]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, BatchNormalization, Masking, Activation
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv("train_FD001.csv")
temp_df = pd.read_csv("train_FD001_selected_features.csv")
drop_columns = [x for x in df if x not in temp_df.columns and x!='unit_number']
print(drop_columns)

['op_setting1', 'op_setting2', 'op_setting3', 'sen_measurement1', 'sen_measurement5', 'sen_measurement6', 'sen_measurement9', 'sen_measurement10', 'sen_measurement14', 'sen_measurement16', 'sen_measurement18', 'sen_measurement19']


In [3]:
df.drop(drop_columns, axis=1, inplace=True)
df

Unnamed: 0,unit_number,time,sen_measurement2,sen_measurement3,sen_measurement4,sen_measurement7,sen_measurement8,sen_measurement11,sen_measurement12,sen_measurement13,sen_measurement15,sen_measurement17,sen_measurement20,sen_measurement21,RUL
0,1,1,641.82,1589.70,1400.60,554.36,2388.06,47.47,521.66,2388.02,8.4195,392,39.06,23.4190,191
1,1,2,642.15,1591.82,1403.14,553.75,2388.04,47.49,522.28,2388.07,8.4318,392,39.00,23.4236,190
2,1,3,642.35,1587.99,1404.20,554.26,2388.08,47.27,522.42,2388.03,8.4178,390,38.95,23.3442,189
3,1,4,642.35,1582.79,1401.87,554.45,2388.11,47.13,522.86,2388.08,8.3682,392,38.88,23.3739,188
4,1,5,642.37,1582.85,1406.22,554.00,2388.06,47.28,522.19,2388.04,8.4294,393,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,100,196,643.49,1597.98,1428.63,551.43,2388.19,48.07,519.49,2388.26,8.4956,397,38.49,22.9735,4
20627,100,197,643.54,1604.50,1433.58,550.86,2388.23,48.04,519.68,2388.22,8.5139,395,38.30,23.1594,3
20628,100,198,643.42,1602.46,1428.18,550.94,2388.24,48.09,520.01,2388.24,8.5646,398,38.44,22.9333,2
20629,100,199,643.23,1605.26,1426.53,550.68,2388.25,48.39,519.67,2388.23,8.5389,395,38.29,23.0640,1


In [4]:
def gen_train(id_df, seq_len, seq_cols):
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    lstm_array=[]

    for start, stop in zip(range(0, num_elements-seq_len+1), range(seq_len, num_elements+1)):
        lstm_array.append(data_array[start:stop, :])
        return np.array(lstm_array)

In [5]:
def gen_target(id_df, seq_length, label):
    data_array = id_df[label].values
    num_elements = data_array.shape[0]
    return data_array[seq_length-1:num_elements+1]

In [6]:
X_Columns = [column for column in df.columns if column!='RUL']

In [7]:
min_max_scaler = MinMaxScaler(feature_range=(-1,1))

In [8]:
df[X_Columns] = min_max_scaler.fit_transform(df[X_Columns])

In [9]:
df

Unnamed: 0,unit_number,time,sen_measurement2,sen_measurement3,sen_measurement4,sen_measurement7,sen_measurement8,sen_measurement11,sen_measurement12,sen_measurement13,sen_measurement15,sen_measurement17,sen_measurement20,sen_measurement21,RUL
0,-1.0,-1.000000,-0.632530,-0.186396,-0.380486,0.452496,-0.515152,-0.261905,0.266525,-5.882353e-01,-0.272028,-0.333333,0.426357,0.449323,191
1,-1.0,-0.994460,-0.433735,-0.093961,-0.294733,0.256039,-0.575758,-0.238095,0.530917,-4.411765e-01,-0.177376,-0.333333,0.333333,0.462027,190
2,-1.0,-0.988920,-0.313253,-0.260955,-0.258947,0.420290,-0.454545,-0.500000,0.590618,-5.588235e-01,-0.285110,-0.666667,0.255814,0.242751,189
3,-1.0,-0.983380,-0.313253,-0.487683,-0.337610,0.481481,-0.363636,-0.666667,0.778252,-4.117647e-01,-0.666795,-0.333333,0.147287,0.324772,188
4,-1.0,-0.977839,-0.301205,-0.485066,-0.190749,0.336554,-0.515152,-0.488095,0.492537,-5.294118e-01,-0.195845,-0.166667,0.178295,0.409003,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20626,1.0,0.080332,0.373494,0.174624,0.565834,-0.491143,-0.121212,0.452381,-0.658849,1.176471e-01,0.313582,0.500000,-0.457364,-0.781000,4
20627,1.0,0.085873,0.403614,0.458906,0.732951,-0.674718,0.000000,0.416667,-0.577825,-9.094947e-13,0.454406,0.166667,-0.751938,-0.267606,3
20628,1.0,0.091413,0.331325,0.369959,0.550641,-0.648953,0.030303,0.476190,-0.437100,5.882353e-02,0.844556,0.666667,-0.534884,-0.892019,2
20629,1.0,0.096953,0.216867,0.492043,0.494936,-0.732689,0.060606,0.833333,-0.582090,2.941176e-02,0.646787,0.166667,-0.767442,-0.531069,1


In [10]:
sequence_length = 50
X_train = np.concatenate(list(list(gen_train(df[df['unit_number']==unit], sequence_length, X_Columns)) for unit in df["unit_number"]))
print("X_train shape:",X_train.shape)
pass
y_train = np.concatenate(list(list(gen_target(df[df["unit_number"]==Unit], sequence_length, "RUL")) for Unit in df["unit_number"].unique()))
print("y_train shape:",y_train.shape)

X_train shape: (20631, 50, 14)
y_train shape: (15731,)


In [16]:
nb_features = X_train.shape[2]
nb_out = 1

model = Sequential()
model.add(LSTM(units = 256,
               return_sequences=True,
               input_shape=(sequence_length, nb_features)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(LSTM(units=128,
               return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='relu'))
model.add(Activation('relu'))
model.compile(loss='mse',optimizer='rmsprop', metrics=['mse'])

  super().__init__(**kwargs)


In [17]:
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs= 10, batch_size=32, validation_split=0.1, verbose=1, callbacks=[EarlyStopping])