# Data analysis and set up

## Import libraries and define utility functions

In [18]:
import pandas as pd
import numpy as np
import sys
import random
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from math import floor
from tensorflow import keras
from tensorflow.keras import layers, callbacks
import tensorflow as tf

In [19]:
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available: 2


In [20]:
mean = lambda l: sum(l) / len(l)

def describe(l):
    print(f'Min={min(l):.4}, Max={max(l):.4}, Avg={mean(l):.4}, Tot={len(l)}')
    return min(l), max(l), mean(l), len(l)

In [21]:
def serialize_info(f_path, seed, training_columns, params, history, best_cost, best_thr, all_cost, all_thr, perf):
    to_serialize = (training_columns, params, history, best_cost, best_thr, all_cost, all_thr, perf)
    with open(f_path + "_" + str(seed), "wb") as file:
        pickle.dump(to_serialize, file)

#### Set up global variables

In [22]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)
plt.rcParams['figure.figsize'] = [18, 12]

In [23]:
results_path = "results/"
seeds = [100, 200, 300, 400, 500, 600, 700, 800]

def set_determinism(seed):
    tf.keras.utils.set_random_seed(seed)
    tf.config.experimental.enable_op_determinism()

#### Data import

In [24]:
margin = 10
df = pd.read_csv("data/train/training_" + str(margin) + ".csv", index_col=[0])

In [25]:
print(len(df), "rows in the dataset")
print(df.columns)
df.head()

33610 rows in the dataset
Index(['Gz', 'Ax', 'Ay', 'Gz_mean', 'Ax_mean', 'Ay_mean', 'Gz_min', 'Ax_min',
       'Ay_min', 'Gz_max', 'Ax_max', 'Ay_max', 'Gz_diff', 'Ax_diff', 'Ay_diff',
       'POSx', 'POSy', 'orient', 'differencing_Gz', 'differencing_Ax',
       'differencing_Ay', 'differencing_Gz_mean', 'differencing_Ax_mean',
       'differencing_Ay_mean', 'differencing_Gz_min', 'differencing_Ax_min',
       'differencing_Ay_min', 'differencing_Gz_max', 'differencing_Ax_max',
       'differencing_Ay_max', 'differencing_Gz_diff', 'differencing_Ax_diff',
       'differencing_Ay_diff', 'orient_discr', 'POSy_discr', 'label'],
      dtype='object')


Unnamed: 0,Gz,Ax,Ay,Gz_mean,Ax_mean,Ay_mean,Gz_min,Ax_min,Ay_min,Gz_max,...,differencing_Ay_min,differencing_Gz_max,differencing_Ax_max,differencing_Ay_max,differencing_Gz_diff,differencing_Ax_diff,differencing_Ay_diff,orient_discr,POSy_discr,label
0,-0.6282,-0.1351,-0.2329,-0.7442,1.2804,1.1006,-0.4221,0.8635,1.0306,-0.9778,...,0.995,-0.1368,0.0139,0.4715,0.0276,-1.0597,-0.0789,1,20.6,960
1,-0.6079,1.1653,0.4926,-0.7306,1.3201,1.0493,-0.4221,0.8635,1.0306,-0.9572,...,0.995,-0.1107,0.0139,0.4715,0.0096,1.2707,0.5526,1,20.6,959
2,-0.5917,1.4625,0.5144,-0.7158,1.4003,1.259,-0.4221,0.8635,1.2123,-0.9408,...,1.1816,-0.1848,0.1923,0.4885,-0.0683,0.3254,0.0004,1,20.7,958
3,-0.5632,-0.059,0.9757,-0.6954,1.2243,1.4664,-0.4055,0.8635,1.2123,-0.9119,...,1.1816,-0.1483,0.1923,0.4885,-0.0136,-1.4467,0.3458,1,20.7,957
4,0.5498,0.2192,0.6788,-0.5616,1.2194,1.414,-0.4055,0.8635,1.2123,0.2165,...,1.1583,1.1685,-0.0294,0.5793,4.7434,0.2389,-0.1707,1,20.8,956


# Machine learning: training phase

## Dataset preprocessing for machine learning models

In this section, RUL labels are converted to binary labels (`0/1`, namely `not_fault/fault`) in order to perform classification instead of regression.

For the `AutoEncoder` model, the dataset is partitioned such that the training set does not contain faults or samples which anticipate a fault. In other words, each sample must be compliant with the `good_samples_thr` threshold.

We basically need an entire section of dataset where faults are not present.

In [None]:
def build_dataset_for_ml_model(df, training_columns, split_size=0.75, as_list=False, ae=False):
    dfs = []
    df_main = df[training_columns]
    fault_indexes = df_main.index[df_main["label"] == 0].tolist() # list of indexes representing faults
    good_samples_thr = margin * 2
    
    previous = 0
    for fi in fault_indexes:
        dfs.append(df_main.iloc[previous:fi+1, :])
        previous = fi + 1
    
    rnd_list = list(range(len(dfs)))
    # Disable the following istruction if you want to compare different models on same test data
    # random.shuffle(rnd_list) 
    
    # If split_size is 1, there will be no val/test set
    train_size = floor(len(dfs) * split_size)
    train_index = rnd_list[:train_size]
    test_index = rnd_list[train_size:]
    train_rul = []
    test_rul = []
    
    if not as_list:
        first = True
        for ti in train_index:
            if not ae:
                to_concat = dfs[ti].copy()
            else:
                to_concat = dfs[ti][dfs[ti]["label"] >= good_samples_thr].copy()
            if first:
                training_set = to_concat
                first = False
            else:
                training_set = pd.concat([training_set, to_concat])

        first = True
        for ti in test_index:
            to_concat = dfs[ti].copy()
            if first:
                test_set = to_concat
                first = False
            else:
                test_set = pd.concat([test_set, to_concat])
        
        train_rul = training_set['label'].tolist()
        if split_size < 1:
            test_rul = test_set['label'].tolist()
        
        training_set['label'] = (training_set['label'] >= margin).map({True: 1, False: 0})
        if split_size < 1:
            test_set['label'] = (test_set['label'] >= margin).map({True: 1, False: 0})

        training_set = training_set.to_numpy()
        if split_size < 1:
            test_set = test_set.to_numpy()
        
    else:
        first = True
        for ti in train_index:
            if not ae:
                to_concat = dfs[ti].copy()
            else:
                to_concat = dfs[ti][dfs[ti]["label"] >= good_samples_thr].copy()
            if first:
                training_set = [to_concat]
                first = False
            else:
                training_set.append(to_concat)
                
        first = True
        for ti in test_index:
            to_concat = dfs[ti].copy()
            if first:
                test_set = [to_concat]
                first = False
            else:
                test_set.append(to_concat)
        
        for t in training_set:
            train_rul = train_rul + t['label'].tolist()
            t['label'] = (t['label'] >= margin).map({True: 1, False: 0})
        if split_size < 1:
            for t in test_set:
                test_rul = test_rul + t['label'].tolist()
                t['label'] = (t['label'] >= margin).map({True: 1, False: 0})
    if split_size < 1:
        return training_set, test_set
    return training_set

## Cost model for threshold optimization and performance evaluation

In [None]:
all_perf = []

In [None]:
BASE_FP = 0.2
BASE_FN = 1

def false_positive_cost(i, is_fault, fault_found):
    return BASE_FP

def false_negative_cost(i, is_fault, fault_found):
    if not fault_found:
        for j in range(1, margin + 1):
            if i + j < is_fault.shape[0] and not is_fault[i + j] or i + j >= is_fault.shape[0]:
                return (margin + 1 - j) * BASE_FN
    else:
        return 0

In [None]:
def threshold_optimization(signal, rul, start, end, n_steps):
    best_cost = sys.maxsize
    best_thr = -1
    all_cost = []
    all_thr = []
    is_fault = (rul == 0)
    
    for thr in np.linspace(start, end, n_steps):
        tmp_cost = 0
        fault_found = False
        for i in range(signal.shape[0]):
            if is_fault[i] and signal[i] >= thr:
                fault_found = True
            if not is_fault[i]:
                fault_found = False
            if not is_fault[i] and signal[i] >= thr:
                tmp_cost += false_positive_cost(i, is_fault, fault_found)
            elif is_fault[i] and signal[i] <= thr:
                tmp_cost += false_negative_cost(i, is_fault, fault_found)
        if tmp_cost < best_cost:
            best_thr = thr
            best_cost = tmp_cost
        all_cost.append(tmp_cost)
        all_thr.append(thr)
        
    print(f'Best threshold: {best_thr:.3f}, best cost = {best_cost:.3f}')

    return best_cost, best_thr, all_cost, all_thr

In [None]:
def plot_threshold(signal, thr, rul):

    plt.plot(signal, alpha=0.5)
    plt.plot(range(len(signal)), [thr] * len(signal))

    ranges = []
    signal_values = []
    for i in range(len(rul)):
        if rul[i] == 0:
            ranges.append(i)
            signal_values.append(signal[i])

    plt.scatter(ranges, signal_values, color="red", s=10)
    
    plt.ylabel('Alarm signal intensity')
    plt.xlabel('Time')
    plt.legend(['Alarm signal', "Threshold", 'Anomalies'], loc='upper right')
    plt.show()
    plt.show()

In [None]:
def performance_evaluation(signal, thr, rul):
    fp, fn, tp, tot_p = 0, 0, 0, 0
    cost = 0
    alarm = (signal >= thr)
    anticipation = []
    is_fault = (rul == 0)
    
    fault_found = False
    for i in range(len(rul)):
        if i > 0 and is_fault[i] and not is_fault[i - 1]:
            tot_p += 1
            start = i
        if is_fault[i] and not fault_found and alarm[i]:
            tp += 1
            fault_found = True
            anticipation.append((margin - 1) - (i - start))
        if (i < len(rul) - 1 and is_fault[i] and not is_fault[i + 1] and not fault_found) or (i == len(rul) - 1 and not fault_found):
            fn += 1 
        if is_fault[i] and signal[i] <= thr:
            cost += false_negative_cost(i, is_fault, fault_found)
        if not is_fault[i]:
            fault_found = False
            if alarm[i]:
                fp += 1
                cost += false_positive_cost(i, is_fault, fault_found)
        
    print(f'The total cost of the model is {cost:.3f}')
    print(f'Detected {tp} faults over {tot_p}, missed faults: {fn}. False alarm: {fp}')
    print(f'Faults detected with an anticipation of:')
    for a in anticipation:
        a = a / 10
        print(f' - {a:.1f}s')
    tot_a = sum(anticipation) / 10
    if sum(anticipation) > 0:
        mean_a = mean(anticipation) / 10
    else:
        mean_a = 0
    print(f'The mean anticipation is {mean_a:.2f}s. Total amount of anticipation is {tot_a:.1f}s')
    
    return [cost, mean_a, tp, fn, fp]

## Baseline: raw signal pre-anomaly detection

In [None]:
features_raw = ['Ax','Ax_mean', 'Ax_min', 'Ax_max', 'Ax_diff', 
                'differencing_Ax', 'differencing_Ax_mean', 
                'differencing_Ax_min', 'differencing_Ax_max', 
                'differencing_Ax_diff']

for feature in features_raw:
    training_columns = [feature, "label"]
    _, validation_set_raw = build_dataset_for_ml_model(df, training_columns=training_columns)
    val_raw_signal, val_raw_rul = -validation_set_raw[:, 0], validation_set_raw[:, -1]
    best_cost_raw, best_thr_raw, all_cost_raw, all_thr_raw = threshold_optimization(val_raw_signal, val_raw_rul, start=0, end=val_raw_signal.max(), n_steps=200)
    perf_raw = performance_evaluation(val_raw_signal, best_thr_raw, val_raw_rul)
    all_perf.append(["raw_signal", training_columns, seed] + perf_raw)
    # todo pensare come serializzare nome
    serialize_info(f_path=results_path + "", seed=0, training_columns=feature, 
                   params={}, history, best_cost, best_thr, all_cost, all_thr, perf)

## Pre-anomaly detection with AutoEncoders

In [None]:
training_set_ae, validation_set_ae = build_dataset_for_ml_model(df, ae=True)

In [None]:
def build_autoencoder(input_size, hidden):
    input_shape = (input_size, )
    ae_x = keras.Input(shape=input_shape, dtype='float32')
    x = ae_x
    for h in hidden:
        x = layers.Dense(h, activation='relu')(x)
    ae_y = layers.Dense(input_size, activation='linear')(x)
    ae = keras.Model(ae_x, ae_y)   
    
    return ae

In [None]:
train_cols_ae = training_set_ae.shape[1] - 1
params = {"hidden_ae": [16, 8, 2, 8, 16]}
ae = build_autoencoder(input_size=train_cols_ae, hidden=hidden_ae)
ae.compile(optimizer=keras.optimizers.RMSprop(learning_rate=0.0001), 
           loss='mse')
cb_ae = [callbacks.EarlyStopping(patience=30, restore_best_weights=True)]
history_ae = ae.fit(training_set_ae[:, :-1], training_set_ae[:, :-1], validation_split=0.15,
                    callbacks=cb_ae, batch_size=32, epochs=1000, verbose=1)

In [None]:
preds_ae = ae.predict(validation_set_ae[:, :-1])

In [None]:
signal_ae = pd.Series(data=np.sum(np.square(preds_ae - validation_set_ae[:, :-1]), axis=1))
rul_ae = validation_set_ae[:, -1]

best_cost_ae, best_thr_ae, all_cost_ae, all_thr_ae = threshold_optimization(signal_ae, rul_ae, start=0, end=signal_ae.max(), n_steps=200)

In [None]:
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_ae, all_cost_ae)
plt.show()

In [None]:
plot_threshold(signal_ae, best_thr_ae, rul_ae)

In [None]:
perf_ae = performance_evaluation(signal_ae, best_thr_ae, rul_ae)

In [None]:
all_perf.append(["autoencoder"] + perf_ae)

## RUL estimation with Dense Neural Networks

In [None]:
training_set_nn, validation_set_nn = build_dataset_for_ml_model(df)

In [None]:
def build_classifier(input_size, hidden):
    input_shape = (input_size,)
    model_in = keras.Input(shape=input_shape, dtype='float32')
    x = model_in
    for h in hidden:
        x = layers.Dense(h, activation='relu')(x)
    model_out = layers.Dense(1, activation='sigmoid')(x)
    model = keras.Model(model_in, model_out)
    return model

**Class weights** are useful when you have an **unbalanced dataset** and you want to improve single-label classification results. With class weights, you can weight more the samples belonging to the rarest class.

In [None]:
counts_nn = pd.Series(training_set_nn[:, -1]).value_counts(normalize=True)
class_weight_nn = {0: 1/counts_nn[0], 1: 1/counts_nn[1]}
class_weight_nn

In [None]:
input_size_nn = training_set_nn.shape[1] - 1

## Logistic Neural Network

#### Model definition

In [None]:
lin_cl = build_classifier(input_size=input_size_nn, hidden=[])
lin_cl.summary()

In [None]:
lin_cl.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), 
               loss='binary_crossentropy')

#### Training

In [None]:
cb_lin = [callbacks.EarlyStopping(patience=30, restore_best_weights=True)]
history_lin_cl = lin_cl.fit(training_set_nn[:, :-1], training_set_nn[:, -1], validation_split=0.2,
                            callbacks=cb_lin, class_weight=class_weight_nn,
                            batch_size=32, epochs=1000, verbose=1)

In [None]:
plot_loss(history_lin_cl, "Logistic Classifier")

#### Threshold optimization

In [None]:
preds_lin_cl = lin_cl.predict(validation_set_nn[:, :-1]).ravel()

In [None]:
signal_lin_cl = pd.Series(data=(1 - preds_lin_cl))
rul_lin_cl = validation_set_nn[:, -1]

best_cost_lin_cl, best_thr_lin_cl, all_cost_lin_cl, all_thr_lin_cl = threshold_optimization(signal_lin_cl, rul_lin_cl, start=0, end=signal_lin_cl.max(), n_steps=200)

In [None]:
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_lin_cl, all_cost_lin_cl)
plt.show()

In [None]:
plot_threshold(signal_lin_cl, best_thr_lin_cl, rul_lin_cl)

In [None]:
perf_lin_cl = performance_evaluation(signal_lin_cl, best_thr_lin_cl, rul_lin_cl)

In [None]:
all_perf.append(["logistic_nn"] + perf_lin_cl)

## Deep Dense Neural Network

#### Model definition

In [None]:
ffnn = build_classifier(input_size=input_size_nn, hidden=[64, 32])
ffnn.summary()

In [None]:
ffnn.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
             loss='binary_crossentropy')

#### Training

In [None]:
cb_ffnn = [callbacks.EarlyStopping(patience=30, restore_best_weights=True)]
history_ffnn = ffnn.fit(training_set_nn[:, :-1], training_set_nn[:, -1], validation_split=0.2,
                        callbacks=cb_ffnn, 
                        class_weight=class_weight_nn,
                        batch_size=32, epochs=1000, verbose=1)

In [None]:
plot_loss(history_ffnn, "Deep Dense Neural Network")

#### Threshold optimization

In [None]:
preds_ffnn = ffnn.predict(validation_set_nn[:, :-1]).ravel()

In [None]:
signal_ffnn = pd.Series(data=(1 - preds_ffnn))
rul_ffnn = validation_set_nn[:, -1]

best_cost_ffnn, best_thr_ffnn, all_cost_ffnn, all_thr_ffnn = threshold_optimization(signal_ffnn, rul_ffnn, start=0, end=signal_ffnn.max(), n_steps=200)

In [None]:
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_ffnn, all_cost_ffnn)
plt.show()

In [None]:
plot_threshold(signal_ffnn, best_thr_ffnn, rul_ffnn)

In [None]:
perf_ffnn = performance_evaluation(signal_ffnn, best_thr_ffnn, rul_ffnn)

In [None]:
all_perf.append(["deep_nn"] + perf_ffnn)

## Convolutional Neural Network

#### Construct input as sequences

In [None]:
def sliding_window_2D(data, stride=1):
    # Get shifted tables
    m = len(data)
    lt = [data.iloc[i:m-w_len+i+1:stride, :].values for i in range(w_len)]
    # Reshape to add a new axis
    s = lt[0].shape
    for i in range(w_len):
        lt[i] = lt[i].reshape(s[0], 1, s[1])
    # Concatenate
    wdata = np.concatenate(lt, axis=1)
    return wdata


def sliding_window_by_fault(data, cols, stride=1):
    l_w, l_r = [], []
    for gdata in data:
        # Apply a sliding window
        tmp_w = sliding_window_2D(gdata[cols], stride)
        # Build the RUL vector
        tmp_r = gdata['label'].iloc[w_len-1::stride]
        # Store everything
        l_w.append(tmp_w)
        l_r.append(tmp_r)
    res_w = np.concatenate(l_w)
    res_r = np.concatenate(l_r)
    return res_w, res_r

In [None]:
traning_set_cnn, validation_set_cnn = build_dataset_for_ml_model(df, as_list=True)

train_cols_cnn = ['differencing_Ax_mean', 'differencing_Gz_mean', 'differencing_Ay_mean']
tr_sw, tr_sw_r = sliding_window_by_fault(traning_set_cnn, train_cols_cnn)
val_sw, val_sw_r = sliding_window_by_fault(validation_set_cnn, train_cols_cnn)

In [None]:
# some sequences
tr_sw[0].shape, tr_sw_r[0], tr_sw[-1].shape, tr_sw_r[-1]

In [None]:
input_size_cnn = tr_sw[0].shape[1]

In [None]:
counts_cnn = pd.Series(tr_sw_r).value_counts(normalize=True)
class_weight_cnn = {0: 1/counts_cnn[0], 1: 1/counts_cnn[1]}
class_weight_cnn

#### Model definition

In [None]:
def build_cnn_regressor(input_size, filters, kernel_size, hidden):
    input_shape = (w_len, input_size)
    model_in = keras.Input(shape=input_shape, dtype='float32')
    model_out = layers.Conv1D(filters, kernel_size=kernel_size, 
                              activation='relu')(model_in)
    model_out = layers.Flatten()(model_out)
    for h in hidden:
        model_out = layers.Dense(h, activation='relu')(model_out)
    model_out = layers.Dense(1, activation='sigmoid')(model_out)
    model = keras.Model(model_in, model_out)
    return model

In [None]:
cnn = build_cnn_regressor(input_size=input_size_cnn, filters=4,
                          kernel_size=5, hidden=[64, 32])
cnn.summary()

In [None]:
cnn.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
            loss='binary_crossentropy')

#### Training

In [None]:
cb_cnn = [callbacks.EarlyStopping(patience=30, restore_best_weights=True)]
history_cnn = cnn.fit(tr_sw, tr_sw_r, validation_split=0.2,
                      callbacks=cb_cnn,
                      class_weight=class_weight_cnn,
                      batch_size=32, epochs=1000, verbose=1)

In [None]:
plot_loss(history_cnn, "Convolutional Neural Network")

#### Threshold optimization

In [None]:
preds_cnn = cnn.predict(val_sw).ravel()

In [None]:
signal_cnn = pd.Series(data=(1 - preds_cnn))
rul_cnn = val_sw_r

best_cost_cnn, best_thr_cnn, all_cost_cnn, all_thr_cnn = threshold_optimization(signal_cnn, rul_cnn, start=0, end=signal_cnn.max(), n_steps=200)

In [None]:
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_cnn, all_cost_cnn)
plt.show()

In [None]:
plot_threshold(signal_cnn, best_thr_cnn, rul_cnn)

In [None]:
perf_cnn = performance_evaluation(signal_cnn, best_thr_cnn, rul_cnn)

In [None]:
all_perf.append(["conv_nn"] + perf_cnn)

## Recurrent Neural Network

#### Construct input as sequences

In [None]:
# Create a 3D input
def create_dataset_3D(X, y, time_steps = 1):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        v = X[i:i+time_steps, :]
        Xs.append(v)
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

In [None]:
TIME_STEPS = margin

traning_set_rnn, validation_set_rnn = build_dataset_for_ml_model(df)
X_train_rnn, y_train_rnn = create_dataset_3D(traning_set_rnn[:, :-1], 
                                             traning_set_rnn[:, -1], 
                                             TIME_STEPS)
X_val_rnn, y_val_rnn = create_dataset_3D(validation_set_rnn[:, :-1], 
                                         validation_set_rnn[:, -1],   
                                         TIME_STEPS)
print('X_train_rnn.shape: ', X_train_rnn.shape)
print('y_train_rnn.shape: ', y_train_rnn.shape)
print('X_val_rnn.shape: ', X_val_rnn.shape)
print('y_val_rnn.shape: ', y_val_rnn.shape)

In [None]:
counts_rnn = pd.Series(tr_sw_r).value_counts(normalize=True)
class_weight_rnn = {0: 1/counts_rnn[0], 1: 1/counts_rnn[1]}
class_weight_rnn

#### Model definition

In [None]:
# Create BiLSTM model for classification
def create_model_bilstm_cl(units, X_train, lr=0.0001):
    model = keras.Sequential()
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=units,                             
              return_sequences=True),
              input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=units)))
    model.add(keras.layers.Dense(1, activation="sigmoid"))
    #Compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=lr))
    return model

# Create LSTM or GRU model
def create_model_cl(units, m, X_train, lr=0.0001):
    model = keras.Sequential()
    model.add(m (units = units, return_sequences = True,
                input_shape = [X_train.shape[1], X_train.shape[2]]))
    model.add(keras.layers.Dropout(0.2))
    model.add(m (units = units))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(units = 1, activation="sigmoid"))
    #Compile model
    model.compile(loss='binary_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=lr))
    return model

In [None]:
# BiLSTM
model_bilstm = create_model_bilstm_cl(64, X_train_rnn)
# GRU and LSTM
model_gru = create_model_cl(64, keras.layers.GRU, X_train_rnn)
model_lstm = create_model_cl(64, keras.layers.LSTM, X_train_rnn)

#### Training

In [None]:
def fit_model(model, name, X_train, y_train, pat=12, ep=1000, bs=32):
        print("\n", name)
        early_stop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                   patience=pat)
        history = model.fit(X_train, y_train, epochs=ep,
                            class_weight=class_weight_rnn,
                            validation_split=0.2, batch_size=bs,
                            shuffle=False, callbacks=[early_stop])
        return history

In [None]:
history_bilstm = fit_model(model_bilstm, "model_bilstm", X_train_rnn, y_train_rnn)
history_lstm = fit_model(model_lstm, "model_lstm", X_train_rnn, y_train_rnn)
history_gru = fit_model(model_gru, "model_gru", X_train_rnn, y_train_rnn)

In [None]:
plot_loss(history_bilstm, "model_bilstm")
plot_loss(history_lstm, "model_lstm")
plot_loss(history_gru, "model_gru")

#### Threshold optimization

In [None]:
preds_bilstm = model_bilstm.predict(X_val_rnn).ravel()
preds_lstm = model_lstm.predict(X_val_rnn).ravel()
preds_gru = model_gru.predict(X_val_rnn).ravel()

signal_bilstm = pd.Series(data=(1 - preds_bilstm))
rul_bilstm = y_val_rnn
signal_lstm = pd.Series(data=(1 - preds_lstm))
rul_lstm = y_val_rnn
signal_gru = pd.Series(data=(1 - preds_gru))
rul_gru = y_val_rnn

best_cost_bilstm, best_thr_bilstm, all_cost_bilstm, all_thr_bilstm = threshold_optimization(signal_bilstm, rul_bilstm, start=0, end=signal_bilstm.max(), n_steps=200)
best_cost_lstm, best_thr_lstm, all_cost_lstm, all_thr_lstm = threshold_optimization(signal_lstm, rul_lstm, start=0, end=signal_lstm.max(), n_steps=200)
best_cost_gru, best_thr_gru, all_cost_gru, all_thr_gru = threshold_optimization(signal_gru, rul_gru, start=0, end=signal_gru.max(), n_steps=200)

In [None]:
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_bilstm, all_cost_bilstm)
plt.show()
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_lstm, all_cost_lstm)
plt.show()
plt.xlabel('Threshold')
plt.ylabel('Cost')
plt.plot(all_thr_gru, all_cost_gru)
plt.show()

In [None]:
plot_threshold(signal_bilstm, best_thr_bilstm, rul_bilstm)
plot_threshold(signal_lstm, best_thr_lstm, rul_lstm)
plot_threshold(signal_gru, best_thr_gru, rul_gru)

In [None]:
print("Bi-LSTM:")
perf_bilstm = performance_evaluation(signal_bilstm, best_thr_bilstm, rul_bilstm)
print("\nLSTM:")
perf_lstm = performance_evaluation(signal_lstm, best_thr_lstm, rul_lstm)
print()
print("\nGRU:")
perf_gru = performance_evaluation(signal_gru, best_thr_gru, rul_gru)

In [None]:
all_perf.append(["bilstm"] + perf_bilstm)
all_perf.append(["lstm"] + perf_lstm)
all_perf.append(["gru"] + perf_gru)

## Analysis over the validation set

In [None]:
pd.DataFrame(all_perf, columns=["model", "cost", "anticipation", "detected_faults", "missed_faults", "false_alarms"]) 