# Training

In [1]:
# Prediction margin: the only parameter to set. Recommended: margin in {5, 10, 15, 20} (aka 0.5, 1, 1.5, 2 seconds)
margin = 30

## Import libraries and define utility functions

In [2]:
import pandas as pd
import numpy as np
import sys
import random
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from math import floor
from tensorflow import keras
from tensorflow.keras import layers, callbacks
import tensorflow as tf

In [3]:
# If you have more than one GPU in your system, the GPU with the lowest ID will be selected by default.
# https://www.tensorflow.org/guide/gpu#using_a_single_gpu_on_a_multi-gpu_system
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available: 2


In [4]:
mean = lambda l: sum(l) / len(l)

In [5]:
def serialize_perf(model_name, seed, columns_name, training_columns, params, params_idx, 
                   history, best_cost, best_thr, all_cost, all_thr, perf):
    f_path = results_path + model_name + "-" + columns_name + "-s" + str(seed) + "-p" + str(params_idx)
    to_serialize = (training_columns, params, history, best_cost, best_thr, all_cost, all_thr, perf)
    with open(f_path, "wb") as file:
        pickle.dump(to_serialize, file)

#### Set up global variables

In [6]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [7]:
results_path = "results_" + str(margin) + "/"
models_path = "models_" + str(margin) + "/"
threshold_path = "threshold_" + str(margin) + "/val_"
seeds = [100, 200, 300]

def set_determinism(seed):
    tf.keras.utils.set_random_seed(seed)
    tf.config.experimental.enable_op_determinism()

In [8]:
patience = 50
epochs = 1000
batch_size = 128
learning_rate = 0.0005
validation_split = 0.2

In [9]:
w5_features_no_diff = [
 'Gz_mean_w5',
 'Ax_mean_w5',
 'Ay_mean_w5',
 'Gz_std_w5',
 'Ax_std_w5',
 'Ay_std_w5',
 'Gz_min_w5',
 'Ax_min_w5',
 'Ay_min_w5',
 'Gz_max_w5',
 'Ax_max_w5',
 'Ay_max_w5'
]

w5_features_diff = [
 'differencing_Gz_mean_w5',
 'differencing_Ax_mean_w5',
 'differencing_Ay_mean_w5',
 'differencing_Gz_std_w5',
 'differencing_Ax_std_w5',
 'differencing_Ay_std_w5',
 'differencing_Gz_min_w5',
 'differencing_Ax_min_w5',
 'differencing_Ay_min_w5',
 'differencing_Gz_max_w5',
 'differencing_Ax_max_w5',
 'differencing_Ay_max_w5',
]

w10_features_no_diff = [
 'Gz_mean_w10',
 'Ax_mean_w10',
 'Ay_mean_w10',
 'Gz_std_w10',
 'Ax_std_w10',
 'Ay_std_w10',
 'Gz_min_w10',
 'Ax_min_w10',
 'Ay_min_w10',
 'Gz_max_w10',
 'Ax_max_w10',
 'Ay_max_w10'
]

w10_features_diff = [
 'differencing_Gz_mean_w10',
 'differencing_Ax_mean_w10',
 'differencing_Ay_mean_w10',
 'differencing_Gz_std_w10',
 'differencing_Ax_std_w10',
 'differencing_Ay_std_w10',
 'differencing_Gz_min_w10',
 'differencing_Ax_min_w10',
 'differencing_Ay_min_w10',
 'differencing_Gz_max_w10',
 'differencing_Ax_max_w10',
 'differencing_Ay_max_w10'
]

w15_features_no_diff = [
 'Gz_mean_w15',
 'Ax_mean_w15',
 'Ay_mean_w15',
 'Gz_std_w15',
 'Ax_std_w15',
 'Ay_std_w15',
 'Gz_min_w15',
 'Ax_min_w15',
 'Ay_min_w15',
 'Gz_max_w15',
 'Ax_max_w15',
 'Ay_max_w15'
]

w15_features_diff = [
 'differencing_Gz_mean_w15',
 'differencing_Ax_mean_w15',
 'differencing_Ay_mean_w15',
 'differencing_Gz_std_w15',
 'differencing_Ax_std_w15',
 'differencing_Ay_std_w15',
 'differencing_Gz_min_w15',
 'differencing_Ax_min_w15',
 'differencing_Ay_min_w15',
 'differencing_Gz_max_w15',
 'differencing_Ax_max_w15',
 'differencing_Ay_max_w15'
]

w20_features_no_diff = [
 'Gz_mean_w20',
 'Ax_mean_w20',
 'Ay_mean_w20',
 'Gz_std_w20',
 'Ax_std_w20',
 'Ay_std_w20',
 'Gz_min_w20',
 'Ax_min_w20',
 'Ay_min_w20',
 'Gz_max_w20',
 'Ax_max_w20',
 'Ay_max_w20'
]

w20_features_diff = [
 'differencing_Gz_mean_w20',
 'differencing_Ax_mean_w20',
 'differencing_Ay_mean_w20',
 'differencing_Gz_std_w20',
 'differencing_Ax_std_w20',
 'differencing_Ay_std_w20',
 'differencing_Gz_min_w20',
 'differencing_Ax_min_w20',
 'differencing_Ay_min_w20',
 'differencing_Gz_max_w20',
 'differencing_Ax_max_w20',
 'differencing_Ay_max_w20',
]

features = {
    "all_features": w5_features_no_diff + w10_features_no_diff + w15_features_no_diff + w20_features_no_diff + w5_features_diff + w10_features_diff + w15_features_diff + w20_features_diff + ['label'], 
    "w5_features": w5_features_no_diff + w5_features_diff + ['label'], 
    "w10_features": w10_features_no_diff + w10_features_diff + ['label'], 
    "w15_features": w15_features_no_diff + w15_features_diff + ['label'], 
    "w20_features": w20_features_no_diff + w20_features_diff + ['label'], 
    "no_diff_features": w5_features_no_diff + w10_features_no_diff + w15_features_no_diff + w20_features_no_diff + ['label'], 
    "diff_features": w5_features_diff + w10_features_diff + w15_features_diff + w20_features_diff + ['label']
}

#### Data import

In [10]:
df = pd.read_csv("data/train/training_" + str(margin) + ".csv", index_col=[0])

In [11]:
print(len(df), "rows in the dataset")
print(df.columns)
df.describe()

33330 rows in the dataset
Index(['Gz', 'Ax', 'Ay', 'Gz_diff', 'Ax_diff', 'Ay_diff', 'Gz_mean_w5',
       'Ax_mean_w5', 'Ay_mean_w5', 'Gz_std_w5',
       ...
       'differencing_Ay_std_w20', 'differencing_Gz_min_w20',
       'differencing_Ax_min_w20', 'differencing_Ay_min_w20',
       'differencing_Gz_max_w20', 'differencing_Ax_max_w20',
       'differencing_Ay_max_w20', 'orient_discr', 'POSy_discr', 'label'],
      dtype='object', length=114)


Unnamed: 0,Gz,Ax,Ay,Gz_diff,Ax_diff,Ay_diff,Gz_mean_w5,Ax_mean_w5,Ay_mean_w5,Gz_std_w5,...,differencing_Ay_std_w20,differencing_Gz_min_w20,differencing_Ax_min_w20,differencing_Ay_min_w20,differencing_Gz_max_w20,differencing_Ax_max_w20,differencing_Ay_max_w20,orient_discr,POSy_discr,label
count,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,...,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0,33330.0
mean,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,-0.0,...,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0071,25.0268,693.7214
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9003,3.6446,486.4557
min,-3.591,-9.4163,-6.6125,-8.9902,-8.4056,-6.2725,-3.113,-8.1356,-4.8702,-0.9338,...,-4.0655,-3.9112,-11.6159,-7.012,-5.2183,-3.2827,-3.8541,-1.0,20.1,0.0
25%,-0.5174,-0.3436,-0.526,-0.2134,-0.433,-0.4971,-0.5244,-0.3965,-0.6088,-0.687,...,-0.5777,-0.4783,-0.4118,-0.5316,-0.3886,-0.6152,-0.6249,-1.0,21.1,297.0
50%,-0.2785,0.0458,-0.0226,0.0175,0.0045,0.0034,-0.2717,0.0523,-0.0462,-0.4343,...,-0.0641,0.0813,0.0572,0.0735,0.0164,-0.1841,-0.1103,0.0,25.6,603.5
75%,0.6446,0.5491,0.5568,0.2488,0.4305,0.4885,0.6691,0.6927,0.595,0.3711,...,0.5177,0.388,0.4509,0.6111,0.534,0.4085,0.5402,1.0,28.9,1003.0
max,2.617,4.2651,6.0183,9.0084,8.7169,7.9005,2.0682,2.3163,4.9736,7.5768,...,5.6167,4.0408,4.2086,3.8691,3.9355,5.3217,5.9881,1.0,29.2,2079.0


# Machine learning: training phase

## Dataset preprocessing for machine learning models

In this section, RUL labels are converted to binary labels (`0/1`, namely `not_fault/fault`) in order to perform classification instead of regression.

For the `AutoEncoder` model, the dataset is partitioned such that the training set does not contain faults or samples which anticipate a fault. In other words, each sample must be compliant with the `good_samples_thr` threshold.

We basically need an entire section of dataset where faults are not present.

In [12]:
def build_dataset_for_ml_model(df, training_columns, split_size=0.75, as_list=False, ae=False):
    dfs = []
    df_main = df[training_columns]
    fault_indexes = df_main.index[df_main["label"] == 0].tolist() # list of indexes representing faults
    good_samples_thr = margin * 2
    
    previous = 0
    for fi in fault_indexes:
        dfs.append(df_main.iloc[previous:fi+1, :])
        previous = fi + 1
    
    rnd_list = list(range(len(dfs)))
    
    # If split_size is 1, there will be no val/test set
    train_size = floor(len(dfs) * split_size)
    train_index = rnd_list[:train_size]
    test_index = rnd_list[train_size:]
    train_rul = []
    test_rul = []
    
    if not as_list:
        first = True
        for ti in train_index:
            if not ae:
                to_concat = dfs[ti].copy()
            else:
                to_concat = dfs[ti][dfs[ti]["label"] >= good_samples_thr].copy()
            if first:
                training_set = to_concat
                first = False
            else:
                training_set = pd.concat([training_set, to_concat])

        first = True
        for ti in test_index:
            to_concat = dfs[ti].copy()
            if first:
                test_set = to_concat
                first = False
            else:
                test_set = pd.concat([test_set, to_concat])
        
        train_rul = training_set['label'].tolist()
        if split_size < 1:
            test_rul = test_set['label'].tolist()
        
        training_set['label'] = (training_set['label'] >= margin).map({True: 1, False: 0})
        if split_size < 1:
            test_set['label'] = (test_set['label'] >= margin).map({True: 1, False: 0})

        training_set = training_set.to_numpy()
        if split_size < 1:
            test_set = test_set.to_numpy()
        
    else:
        first = True
        for ti in train_index:
            if not ae:
                to_concat = dfs[ti].copy()
            else:
                to_concat = dfs[ti][dfs[ti]["label"] >= good_samples_thr].copy()
            if first:
                training_set = [to_concat]
                first = False
            else:
                training_set.append(to_concat)
                
        first = True
        for ti in test_index:
            to_concat = dfs[ti].copy()
            if first:
                test_set = [to_concat]
                first = False
            else:
                test_set.append(to_concat)
        
        for t in training_set:
            train_rul = train_rul + t['label'].tolist()
            t['label'] = (t['label'] >= margin).map({True: 1, False: 0})
        if split_size < 1:
            for t in test_set:
                test_rul = test_rul + t['label'].tolist()
                t['label'] = (t['label'] >= margin).map({True: 1, False: 0})
    if split_size < 1:
        return training_set, test_set
    return training_set

## Cost model for threshold optimization and performance evaluation

In [13]:
all_perf = []

In [14]:
BASE_FP = 0.2
BASE_FN = 1

def false_positive_cost(i, is_fault, fault_found):
    return BASE_FP

def false_negative_cost(i, is_fault, fault_found):
    if not fault_found:
        for j in range(1, margin + 1):
            if i + j < is_fault.shape[0] and not is_fault[i + j] or i + j >= is_fault.shape[0]:
                return (margin + 1 - j) * BASE_FN
    else:
        return 0

In [15]:
def threshold_optimization(signal, rul, start, end, n_steps):
    best_cost = sys.maxsize
    best_thr = -1
    all_cost = []
    all_thr = []
    is_fault = (rul == 0)
    
    for thr in np.linspace(start, end, n_steps):
        tmp_cost = 0
        fault_found = False
        for i in range(signal.shape[0]):
            if is_fault[i] and signal[i] >= thr:
                fault_found = True
            if not is_fault[i]:
                fault_found = False
            if not is_fault[i] and signal[i] >= thr:
                tmp_cost += false_positive_cost(i, is_fault, fault_found)
            elif is_fault[i] and signal[i] <= thr:
                tmp_cost += false_negative_cost(i, is_fault, fault_found)
        if tmp_cost < best_cost:
            best_thr = thr
            best_cost = tmp_cost
        all_cost.append(tmp_cost)
        all_thr.append(thr)

    return best_cost, best_thr, all_cost, all_thr

In [16]:
def plot_threshold(signal, thr, rul):

    plt.plot(signal, alpha=0.5)
    plt.plot(range(len(signal)), [thr] * len(signal))

    ranges = []
    signal_values = []
    for i in range(len(rul)):
        if rul[i] == 0:
            ranges.append(i)
            signal_values.append(signal[i])

    plt.scatter(ranges, signal_values, color="red", s=10)
    
    plt.ylabel('Alarm signal intensity')
    plt.xlabel('Time')
    plt.legend(['Alarm signal', "Threshold", 'Anomalies'], loc='upper right')
    plt.show()
    plt.show()

In [17]:
def performance_evaluation(signal, thr, rul):
    fp, fn, tp, tot_p = 0, 0, 0, 0
    cost = 0
    alarm = (signal >= thr)
    anticipation = []
    is_fault = (rul == 0)
    
    fault_found = False
    for i in range(len(rul)):
        if i > 0 and is_fault[i] and not is_fault[i - 1]:
            tot_p += 1
            start = i
        if is_fault[i] and not fault_found and alarm[i]:
            tp += 1
            fault_found = True
            anticipation.append((margin - 1) - (i - start))
        if (i < len(rul) - 1 and is_fault[i] and not is_fault[i + 1] and not fault_found) or (i == len(rul) - 1 and not fault_found):
            fn += 1 
        if is_fault[i] and signal[i] <= thr:
            cost += false_negative_cost(i, is_fault, fault_found)
        if not is_fault[i]:
            fault_found = False
            if alarm[i]:
                fp += 1
                cost += false_positive_cost(i, is_fault, fault_found)
        
    tot_a = sum(anticipation) / 10
    if sum(anticipation) > 0:
        mean_a = mean(anticipation) / 10
    else:
        mean_a = 0
    
    return [cost, mean_a, tp, fn, fp]

## Baseline: raw signal pre-anomaly detection

In [18]:
features_raw = [
 'Ax',
 'Ax_diff',
 'Ax_mean_w5',
 'Ax_std_w5',
 'Ax_min_w5',
 'Ax_max_w5',
 'Ax_mean_w10',
 'Ax_std_w10',
 'Ax_min_w10',
 'Ax_max_w10',
 'Ax_mean_w15',
 'Ax_std_w15',
 'Ax_min_w15',
 'Ax_max_w15',
 'Ax_mean_w20',
 'Ax_std_w20',
 'Ax_min_w20',
 'Ax_max_w20',
 'differencing_Ax',
 'differencing_Ax_diff',
 'differencing_Ax_mean_w5',
 'differencing_Ax_std_w5',
 'differencing_Ax_min_w5',
 'differencing_Ax_max_w5',
 'differencing_Ax_mean_w10',
 'differencing_Ax_std_w10',
 'differencing_Ax_min_w10',
 'differencing_Ax_max_w10',
 'differencing_Ax_mean_w15',
 'differencing_Ax_std_w15',
 'differencing_Ax_min_w15',
 'differencing_Ax_max_w15',
 'differencing_Ax_mean_w20',
 'differencing_Ax_std_w20',
 'differencing_Ax_min_w20',
 'differencing_Ax_max_w20',
]

for seed in seeds:
    for feature in features_raw:
        
        set_determinism(seed)
        
        training_columns = [feature, "label"]
        _, validation_set_raw = build_dataset_for_ml_model(df, training_columns=training_columns)
        
        val_raw_signal, val_raw_rul = -validation_set_raw[:, 0], validation_set_raw[:, -1]
        
        best_cost_raw, best_thr_raw, all_cost_raw, all_thr_raw = threshold_optimization(val_raw_signal, val_raw_rul, start=0, end=val_raw_signal.max(), n_steps=200)
        
        f_path = threshold_path + "raw_signal" + "-" + feature + "-s" + str(seed) + "-p0"
        to_serialize = (val_raw_signal, best_thr_raw, val_raw_rul)
        with open(f_path, "wb") as file:
            pickle.dump(to_serialize, file)
        
        perf_raw = performance_evaluation(val_raw_signal, best_thr_raw, val_raw_rul)
        all_perf.append(["raw_signal", seed, feature, {}] + perf_raw)
        
        serialize_perf("raw_signal", seed=seed, columns_name=feature, training_columns=training_columns, 
                       params={}, params_idx=0, history=None, best_cost=best_cost_raw, best_thr=best_thr_raw, 
                       all_cost=all_cost_raw, all_thr=all_thr_raw, perf=perf_raw)
        
        # No model saving here: there is just the threshold

## Pre-anomaly detection with AutoEncoders

In [19]:
def build_autoencoder(input_size, hidden):
    input_shape = (input_size, )
    ae_x = keras.Input(shape=input_shape, dtype='float32')
    x = ae_x
    for h in hidden:
        x = layers.Dense(h, activation='relu')(x)
    ae_y = layers.Dense(input_size, activation='linear')(x)
    ae = keras.Model(ae_x, ae_y)   
    
    return ae

In [20]:
params_ae = [{"hidden_ae": [16, 8, 2, 8, 16]}, 
             {"hidden_ae": [64, 24, 9, 24, 64]}, 
             {"hidden_ae": [128, 56, 18, 56, 128]}]

In [None]:
for seed in seeds:
    for columns in features:
        for params_idx, params in enumerate(params_ae):

            set_determinism(seed)

            training_set_ae, validation_set_ae = build_dataset_for_ml_model(df, training_columns=features[columns], ae=True)

            train_cols_ae = training_set_ae.shape[1] - 1
            ae = build_autoencoder(input_size=train_cols_ae, hidden=params["hidden_ae"])
            ae.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
                       loss='mse')
            cb_ae = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_ae = ae.fit(training_set_ae[:, :-1], training_set_ae[:, :-1], validation_split=validation_split,
                                callbacks=cb_ae, batch_size=batch_size, epochs=epochs, verbose=0)
            
            preds_ae = ae.predict(validation_set_ae[:, :-1])
            
            signal_ae = pd.Series(data=np.sum(np.square(preds_ae - validation_set_ae[:, :-1]), axis=1))
            rul_ae = validation_set_ae[:, -1]

            best_cost_ae, best_thr_ae, all_cost_ae, all_thr_ae = threshold_optimization(signal_ae, rul_ae, start=0, end=signal_ae.max(), n_steps=200)
                
            f_path = threshold_path + "autoencoder" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_ae, best_thr_ae, rul_ae)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
            
            perf_ae = performance_evaluation(signal_ae, best_thr_ae, rul_ae)
            all_perf.append(["autoencoder", seed, columns, params] + perf_ae)
            
            serialize_perf("autoencoder", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_ae, best_cost=best_cost_ae, best_thr=best_thr_ae, 
                           all_cost=all_cost_ae, all_thr=all_thr_ae, perf=perf_ae)
            
            ae.save(models_path + "autoencoder" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))

INFO:tensorflow:Assets written to: ram://5fd31bd7-3e7d-44a0-b8af-b1e22f326a83/assets
INFO:tensorflow:Assets written to: models_30/autoencoder-all_features-s100-p0/assets
INFO:tensorflow:Assets written to: ram://40005caf-1faf-4948-a0a5-8523a1167393/assets
INFO:tensorflow:Assets written to: models_30/autoencoder-all_features-s100-p1/assets


## RUL estimation with Dense Neural Networks

In [None]:
def build_classifier(input_size, hidden):
    input_shape = (input_size,)
    model_in = keras.Input(shape=input_shape, dtype='float32')
    x = model_in
    for h in hidden:
        x = layers.Dense(h, activation='relu')(x)
    model_out = layers.Dense(1, activation='sigmoid')(x)
    model = keras.Model(model_in, model_out)
    return model

In [None]:
params_mlp = [{"hidden_mlp": []},
              {"hidden_mlp": [32]},
              {"hidden_mlp": [64, 32]}, 
              {"hidden_mlp": [128, 64, 32]}]

**Class weights** are useful when you have an **unbalanced dataset** and you want to improve single-label classification results. With class weights, you can weight more the samples belonging to the rarest class.

In [None]:
for seed in seeds:
    for columns in features:
        for params_idx, params in enumerate(params_mlp):

            set_determinism(seed)
            
            training_set_mlp, validation_set_mlp = build_dataset_for_ml_model(df, training_columns=features[columns])
            counts_mlp = pd.Series(training_set_mlp[:, -1]).value_counts(normalize=True)
            class_weight_mlp = {0: 1/counts_mlp[0], 1: 1/counts_mlp[1]}
            
            input_size_mlp = training_set_mlp.shape[1] - 1
            mlp = build_classifier(input_size=input_size_mlp, hidden=params["hidden_mlp"])
            mlp.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='binary_crossentropy')
            cb_mlp = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_mlp = mlp.fit(training_set_mlp[:, :-1], training_set_mlp[:, -1], validation_split=validation_split,
                                  callbacks=cb_mlp, class_weight=class_weight_mlp,
                                  batch_size=batch_size, epochs=epochs, verbose=0)
                
            preds_mlp = mlp.predict(validation_set_mlp[:, :-1]).ravel()
            
            signal_mlp = pd.Series(data=(1 - preds_mlp))
            rul_mlp = validation_set_mlp[:, -1]

            best_cost_mlp, best_thr_mlp, all_cost_mlp, all_thr_mlp = threshold_optimization(signal_mlp, rul_mlp, start=0, end=signal_mlp.max(), n_steps=200)
            
            f_path = threshold_path + "mlp" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_mlp, best_thr_mlp, rul_mlp)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
            
            perf_mlp = performance_evaluation(signal_mlp, best_thr_mlp, rul_mlp)
            all_perf.append(["mlp", seed, columns, params] + perf_mlp)
            
            serialize_perf("mlp", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_mlp, best_cost=best_cost_mlp, best_thr=best_thr_mlp, 
                           all_cost=all_cost_mlp, all_thr=all_thr_mlp, perf=perf_mlp)
            
            mlp.save(models_path + "mlp" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))

## RUL estimation with Convolutional Neural Networks

In [None]:
def sliding_window_2D(data, w_len, stride=1):
    # Get shifted tables
    m = len(data)
    lt = [data.iloc[i:m-w_len+i+1:stride, :].values for i in range(w_len)]
    # Reshape to add a new axis
    s = lt[0].shape
    for i in range(w_len):
        lt[i] = lt[i].reshape(s[0], 1, s[1])
    # Concatenate
    wdata = np.concatenate(lt, axis=1)
    return wdata


def sliding_window_by_fault(data, cols, w_len, stride=1):
    l_w, l_r = [], []
    for gdata in data:
        # Apply a sliding window
        tmp_w = sliding_window_2D(gdata[cols], w_len, stride)
        # Build the RUL vector
        tmp_r = gdata['label'].iloc[w_len-1::stride]
        # Store everything
        l_w.append(tmp_w)
        l_r.append(tmp_r)
    res_w = np.concatenate(l_w)
    res_r = np.concatenate(l_r)
    return res_w, res_r

In [None]:
def build_cnn_regressor(input_size, filters, kernel_size, hidden, w_len):
    input_shape = (w_len, input_size)
    model_in = keras.Input(shape=input_shape, dtype='float32')
    model_out = layers.Conv1D(filters, kernel_size=kernel_size, 
                              activation='relu')(model_in)
    model_out = layers.Flatten()(model_out)
    for h in hidden:
        model_out = layers.Dense(h, activation='relu')(model_out)
    model_out = layers.Dense(1, activation='sigmoid')(model_out)
    model = keras.Model(model_in, model_out)
    return model

In [None]:
params_cnn = [{"filters": 1, "kernel_size": 3, "hidden": [32], "w_len": 5},
              {"filters": 4, "kernel_size": 3, "hidden": [32], "w_len": 5},
              {"filters": 1, "kernel_size": 5, "hidden": [32], "w_len": 5},
              {"filters": 4, "kernel_size": 5, "hidden": [32], "w_len": 5},
              {"filters": 4, "kernel_size": 5, "hidden": [64, 32], "w_len": 5},
              {"filters": 1, "kernel_size": 3, "hidden": [32], "w_len": 10},
              {"filters": 4, "kernel_size": 3, "hidden": [32], "w_len": 10},
              {"filters": 1, "kernel_size": 5, "hidden": [32], "w_len": 10},
              {"filters": 4, "kernel_size": 5, "hidden": [32], "w_len": 10},
              {"filters": 4, "kernel_size": 5, "hidden": [64, 32], "w_len": 10},
              {"filters": 4, "kernel_size": 7, "hidden": [128, 64, 32], "w_len": 10}]

In [None]:
for seed in seeds:
    for columns in features:
        for params_idx, params in enumerate(params_cnn):
            
            set_determinism(seed)
            
            traning_set_cnn, validation_set_cnn = build_dataset_for_ml_model(df, training_columns=features[columns], as_list=True)
            tr_sw, tr_sw_r = sliding_window_by_fault(traning_set_cnn, features[columns], params["w_len"])
            val_sw, val_sw_r = sliding_window_by_fault(validation_set_cnn, features[columns], params["w_len"])
            counts_cnn = pd.Series(tr_sw_r).value_counts(normalize=True)
            class_weight_cnn = {0: 1/counts_cnn[0], 1: 1/counts_cnn[1]}

            input_size_cnn = tr_sw[0].shape[1]
            cnn = build_cnn_regressor(input_size=input_size_cnn, filters=params["filters"],
                                      kernel_size=params["kernel_size"], hidden=params["hidden"], w_len=params["w_len"])
            cnn.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), 
                        loss='binary_crossentropy')
            cb_cnn = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_cnn = cnn.fit(tr_sw, tr_sw_r, validation_split=validation_split,
                                  callbacks=cb_cnn,
                                  class_weight=class_weight_cnn,
                                  batch_size=batch_size, epochs=epochs, verbose=0)
            
            preds_cnn = cnn.predict(val_sw).ravel()
            
            signal_cnn = pd.Series(data=(1 - preds_cnn))
            rul_cnn = val_sw_r

            best_cost_cnn, best_thr_cnn, all_cost_cnn, all_thr_cnn = threshold_optimization(signal_cnn, rul_cnn, start=0, end=signal_cnn.max(), n_steps=200)
            
            f_path = threshold_path + "conv_nn" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_cnn, best_thr_cnn, rul_cnn)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
            
            perf_cnn = performance_evaluation(signal_cnn, best_thr_cnn, rul_cnn)
            all_perf.append(["conv_nn", seed, columns, params] + perf_cnn)
            
            serialize_perf("conv_nn", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_cnn, best_cost=best_cost_cnn, best_thr=best_thr_cnn, 
                           all_cost=all_cost_cnn, all_thr=all_thr_cnn, perf=perf_cnn)
            
            cnn.save(models_path + "conv_nn" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))

## RUL estimation with Recurrent Neural Networks (LSTM and GRU)

In [None]:
def create_dataset_3D(X, y, time_steps):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        v = X[i:i+time_steps, :]
        Xs.append(v)
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

In [None]:
def create_model_cl(units, m, X_train, lr):
    model = keras.Sequential()
    model.add(m(units=units, return_sequences=True,
                input_shape=[X_train.shape[1], X_train.shape[2]]))
    model.add(m(units=units))
    model.add(keras.layers.Dense(units=1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=lr))
    return model

In [None]:
params_rnn = [{"time_steps": 5, "units": 64},
              {"time_steps": 5, "units": 128},
              {"time_steps": 10, "units": 64},
              {"time_steps": 10, "units": 128}]

In [None]:
for seed in seeds:
    for columns in features:
        for params_idx, params in enumerate(params_rnn):
            
            set_determinism(seed)

            traning_set_rnn, validation_set_rnn = build_dataset_for_ml_model(df, training_columns=features[columns])
            X_train_rnn, y_train_rnn = create_dataset_3D(traning_set_rnn[:, :-1], 
                                                         traning_set_rnn[:, -1], 
                                                         params["time_steps"])
            X_val_rnn, y_val_rnn = create_dataset_3D(validation_set_rnn[:, :-1], 
                                                     validation_set_rnn[:, -1],   
                                                     params["time_steps"])
            counts_rnn = pd.Series(tr_sw_r).value_counts(normalize=True)
            class_weight_rnn = {0: 1/counts_rnn[0], 1: 1/counts_rnn[1]}
            
            model_gru = create_model_cl(params["units"], keras.layers.GRU, X_train_rnn, learning_rate)
            early_stop = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_gru = model_gru.fit(X_train_rnn, y_train_rnn, epochs=epochs,
                                        class_weight=class_weight_rnn,
                                        validation_split=validation_split, batch_size=batch_size,
                                        shuffle=False, callbacks=early_stop)
            
            model_lstm = create_model_cl(params["units"], keras.layers.LSTM, X_train_rnn, learning_rate)
            early_stop = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_lstm = model_lstm.fit(X_train_rnn, y_train_rnn, epochs=epochs,
                                          class_weight=class_weight_rnn,
                                          validation_split=validation_split, batch_size=batch_size,
                                          shuffle=False, callbacks=early_stop, verbose=0)

            preds_lstm = model_lstm.predict(X_val_rnn).ravel()
            preds_gru = model_gru.predict(X_val_rnn).ravel()
            
            signal_lstm = pd.Series(data=(1 - preds_lstm))
            rul_lstm = y_val_rnn
            signal_gru = pd.Series(data=(1 - preds_gru))
            rul_gru = y_val_rnn
            
            best_cost_lstm, best_thr_lstm, all_cost_lstm, all_thr_lstm = threshold_optimization(signal_lstm, rul_lstm, start=0, end=signal_lstm.max(), n_steps=200)
            best_cost_gru, best_thr_gru, all_cost_gru, all_thr_gru = threshold_optimization(signal_gru, rul_gru, start=0, end=signal_gru.max(), n_steps=200)

            f_path = threshold_path + "lstm" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_lstm, best_thr_lstm, rul_lstm)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
                
            f_path = threshold_path + "gru" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_gru, best_thr_gru, rul_gru)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
            
            perf_lstm = performance_evaluation(signal_lstm, best_thr_lstm, rul_lstm)
            perf_gru = performance_evaluation(signal_gru, best_thr_gru, rul_gru)
            all_perf.append(["lstm", seed, columns, params] + perf_lstm)  
            all_perf.append(["gru", seed, columns, params] + perf_gru)
            
            serialize_perf("lstm", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_lstm, best_cost=best_cost_lstm, best_thr=best_thr_lstm, 
                           all_cost=all_cost_lstm, all_thr=all_thr_lstm, perf=perf_lstm)
            
            model_lstm.save(models_path + "lstm" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))
            
            serialize_perf("gru", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_gru, best_cost=best_cost_gru, best_thr=best_thr_gru, 
                           all_cost=all_cost_gru, all_thr=all_thr_gru, perf=perf_gru)
            
            model_gru.save(models_path + "gru" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))

## RUL estimation with Recurrent Neural Networks (BiLSTM)

In [None]:
def create_model_bilstm_cl(units, X_train, lr):
    model = keras.Sequential()
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=units,                             
              return_sequences=True),
              input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=units)))
    model.add(keras.layers.Dense(1, activation="sigmoid"))
    model.compile(loss='binary_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=lr))
    return model

In [None]:
params_bilstm = [{"time_steps": 5, "units": 64},
                 {"time_steps": 5, "units": 128},
                 {"time_steps": 10, "units": 64},
                 {"time_steps": 10, "units": 128}]

In [None]:
for seed in seeds:
    for columns in features:
        for params_idx, params in enumerate(params_bilstm):
            
            set_determinism(seed)

            traning_set_bilstm, validation_set_bilstm = build_dataset_for_ml_model(df, training_columns=features[columns])
            X_train_bilstm, y_train_bilstm = create_dataset_3D(traning_set_bilstm[:, :-1], 
                                                               traning_set_bilstm[:, -1], 
                                                               params["time_steps"])
            X_val_bilstm, y_val_bilstm = create_dataset_3D(validation_set_bilstm[:, :-1], 
                                                           validation_set_bilstm[:, -1],   
                                                           params["time_steps"])
            counts_bilstm = pd.Series(tr_sw_r).value_counts(normalize=True)
            class_weight_bilstm = {0: 1/counts_bilstm[0], 1: 1/counts_bilstm[1]}
            model_bilstm = create_model_bilstm_cl(params["units"], X_train_bilstm, learning_rate)
            early_stop = [callbacks.EarlyStopping(patience=patience, restore_best_weights=True)]
            history_bilstm = model_bilstm.fit(X_train_bilstm, y_train_bilstm, epochs=epochs,
                                              class_weight=class_weight_bilstm,
                                              validation_split=validation_split, batch_size=batch_size,
                                              shuffle=False, callbacks=early_stop, verbose=0)

            preds_bilstm = model_bilstm.predict(X_val_bilstm).ravel()
            
            signal_bilstm = pd.Series(data=(1 - preds_bilstm))
            rul_bilstm = y_val_bilstm

            best_cost_bilstm, best_thr_bilstm, all_cost_bilstm, all_thr_bilstm = threshold_optimization(signal_bilstm, rul_bilstm, start=0, end=signal_bilstm.max(), n_steps=200)
            
            f_path = threshold_path + "bilstm" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx)
            to_serialize = (signal_bilstm, best_thr_bilstm, rul_bilstm)
            with open(f_path, "wb") as file:
                pickle.dump(to_serialize, file)
            
            perf_bilstm = performance_evaluation(signal_bilstm, best_thr_bilstm, rul_bilstm)
            all_perf.append(["bilstm", seed, columns, params] + perf_bilstm)
            
            serialize_perf("bilstm", seed=seed, columns_name=columns, training_columns=features[columns], 
                           params=params, params_idx=params_idx, history=history_bilstm, best_cost=best_cost_bilstm, best_thr=best_thr_bilstm, 
                           all_cost=all_cost_bilstm, all_thr=all_thr_bilstm, perf=perf_bilstm)
            
            model_bilstm.save(models_path + "bilstm" + "-" + columns + "-s" + str(seed) + "-p" + str(params_idx))

## Analysis over the validation set

In [None]:
df_res = pd.DataFrame(all_perf, columns=["model", "seed", "columns", "params", "cost", "anticipation", "detected_faults", "missed_faults", "false_alarms"])
df_res.to_csv("training_summary_" + str(margin) + ".csv")
df_res