### Import packages

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf 
import math

In [2]:
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
from keras.activations import softmax
import keras.backend as K

Using TensorFlow backend.


In [9]:
import os 
os.chdir("/users/az02234/Documents/Projets_Renault/PredictiveMaintenance/PredictiveMaintenanceAutoEncoder/data/interim/")

### Loading data

In [10]:
data = pd.read_csv("data_dl.csv",  dtype={'dataValue': np.float64, 'pji': np.int64}, parse_dates=['sourceTimestamp_dtformat'])

In [11]:
data.head().T

Unnamed: 0,0,1,2,3,4
dataValue,519,230,203,238,231
pji,1411188,1411188,1411188,1411188,1411188
sourceTimestamp_dtformat,2019-04-02 11:33:31.609000,2019-04-02 11:33:31.812000,2019-04-02 11:33:32.015000,2019-04-02 11:33:32.125000,2019-04-02 11:33:32.218000


### Reshaping data

Step 1 : transform time stamp into a normalized numerical vector

Step 2 : transform dataValue serie into a list of list, one for each vehicle, with padding to reach maximum serie length. Each value is made of three element: timestamp, time since previous element, value

In [12]:
def compute_max_sequence_length(dataframe):
    lengths = dataframe.groupby("pji").count()
    max_length = lengths.max()[0]
    return max_length

In [13]:
exemple = data[0:5000]

In [14]:
max_length = compute_max_sequence_length(exemple)
print(max_length)

281


In [15]:
exemple["micro_second"] = exemple.sourceTimestamp_dtformat.dt.microsecond \
                          + exemple.sourceTimestamp_dtformat.dt.second*1000 \
                          + exemple.sourceTimestamp_dtformat.dt.minute*1000*60 \
                          + exemple.sourceTimestamp_dtformat.dt.hour*1000*60*60 \
                          + exemple.sourceTimestamp_dtformat.dt.day*1000*60*60*24

In [16]:
def normalise(dataframe):
    dataframe["micro_second"] = dataframe["micro_second"]/1000
    dataframe["normalized_time"] = (dataframe["micro_second"] - dataframe["micro_second"].min()) / dataframe["micro_second"].max()
    return dataframe

In [17]:
def compute_lag(dataframe):
    dataframe.sort_values(by=["normalized_time"], inplace=True)
    dataframe['interval'] = dataframe["micro_second"] - dataframe["micro_second"].shift()
    return dataframe

In [18]:
exemple = exemple.groupby("pji").apply(normalise)
exemple = exemple.groupby("pji").apply(compute_lag)
exemple = exemple.reset_index(drop=True)
exemple = exemple.fillna(0)

In [19]:
exemple.head()

Unnamed: 0,dataValue,pji,sourceTimestamp_dtformat,micro_second,normalized_time,interval
0,248.0,1130047,2019-04-04 05:56:15.078,367053.0,0.0,0.0
1,245.0,1130047,2019-04-04 05:56:17.078,367055.0,5e-06,2.0
2,254.0,1130047,2019-04-04 05:56:19.078,367057.0,1.1e-05,2.0
3,312.0,1130047,2019-04-04 05:56:21.078,367059.0,1.6e-05,2.0
4,504.0,1130047,2019-04-04 05:56:25.078,367063.0,2.7e-05,4.0


In [20]:
data_for_dl = []
for vehicle_id in exemple.pji.drop_duplicates():
    # select vehicle data 
    selector = (exemple.pji == vehicle_id)
    vehicle_data = exemple.loc[selector, ['dataValue', 'normalized_time', 'interval']]
    
    # compute length of the padding
    padding_length = 287 - vehicle_data.shape[0]
    
    # add padding
    pad = (np.repeat( np.array([0,1,0]).reshape(-1,1), padding_length, axis=1)).T
    vehicle_data = np.vstack((vehicle_data, pad))
    
    # Append to final array
    data_for_dl.append(list(vehicle_data))

In [21]:
data_for_dl = np.array(data_for_dl)

In [22]:
data_for_dl.shape

(19, 287, 3)

In [23]:
data_for_dl[0][0].shape

(3,)

In [24]:
data_for_dl[0].shape

(287, 3)

### Create pseudo-target

step 1 : Divide each time serie in ten slice

step 2 : Compute mean / median / standard deviation / first and third quartile

step 3 : Reshuffle each new serie in the same way

In [25]:
## To be optimized : first build a complete vector of block id and then concatenate it rather than doing multiple
## select within the df 
def build_block(exemple, vehicle_id, num_block=10):
    exemple.sort_values(by=['pji', 'normalized_time'], inplace=True)
    selector = (exemple.pji == vehicle_id)
    vehicle_data = exemple[selector]
    length = vehicle_data.shape[0]
    vehicle_data["block"] = 0
    index_block = vehicle_data.columns.get_loc("block")
    for i in range(num_block):
        if i < 9:
            vehicle_data.iloc[math.floor(length/num_block) *(i): math.floor(length/num_block) *(i+1),index_block] = i
        else:
            vehicle_data.iloc[math.floor(length/num_block) *(i): length,index_block] = i
    return vehicle_data

In [26]:
def extract_stat_by_block(vehicle_data):
    mean = np.around(vehicle_data.groupby("block")["dataValue"].mean())
    std = np.around(vehicle_data.groupby("block")["dataValue"].std())
    median = np.around(vehicle_data.groupby("block")["dataValue"].median())
    first_quartile = np.around(vehicle_data.groupby("block")["dataValue"].quantile(0.25))
    third_quartile = np.around(vehicle_data.groupby("block")["dataValue"].quantile(0.75))

    target = pd.concat([mean, std, median, first_quartile, third_quartile], axis=0)
    target.reset_index(drop=True, inplace=True)
    #target = np.array(target).reshape(-1,1)
    target = np.array(target)
    return target

In [27]:
def build_target(exemple, num_block=10):
    target_list = []
    shuffled_index = np.array(range(num_block*5))
    np.random.shuffle(shuffled_index)
    for vehicle_id in exemple.pji.drop_duplicates():
        vehicle_data = build_block(exemple, vehicle_id, num_block=num_block)
        target = extract_stat_by_block(vehicle_data)
        target = target[shuffled_index.argsort()]
        target_list.append(list(target))
    target_list = np.array(target_list)
    return target_list

In [28]:
target_list = build_target(exemple, vehicle_id, num_block=10)
target_list.shape

(19, 50)

### Create encoder

In [29]:
Ty = 50
Tx = 287
nhs_lstm1 = 16

In [30]:
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "relu")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)

In [31]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator([a,s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
    e = densor1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
    e = densor2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator(e)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor([alphas,a])
    
    return context

In [32]:
n_a = 16
n_s = 32
m = 19
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(1, activation='relu')

In [33]:
def model_encoder(Tx, Ty, n_a, n_s):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X = Input(shape=(Tx, 3))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    # Initialize empty list of outputs
    outputs = []

    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
    a = Bidirectional(LSTM(n_a,return_sequences=True))(X)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention(a, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    final_output = Concatenate(axis=-1)(outputs)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs=[X, s0, c0], outputs=final_output)
    
    return model

In [34]:
model_encoder = model_encoder(Tx, Ty, n_a, n_s)

In [35]:
model_encoder.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 287, 3)       0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 32)           0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 287, 32)      2560        input_1[0][0]                    
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 287, 32)      0           s0[0][0]                         
                                                                 lstm_1[0][0]                     
          

In [36]:
opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
model_encoder.compile(opt, loss='mean_squared_error')

In [37]:
#s0 = np.zeros((m, n_s))
#c0 = np.zeros((m, n_s))

s0 = np.random.randn(m, n_s)
c0 = np.random.randn(m, n_s)

In [38]:
target_list.shape

(19, 50)

In [47]:
model_encoder.fit([data_for_dl, s0, c0], target_list, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1442964a8>

In [48]:
model_encoder.evaluate([data_for_dl, s0, c0], target_list)



189408.3125

In [49]:
target_list[0]

array([452., 479., 101., 465., 514., 482., 108., 400., 471., 504., 515.,
       500., 463., 471., 478.,  88., 502., 503., 106., 459., 446., 519.,
       496., 509., 487., 528., 454., 114., 498., 495.,  92., 500., 466.,
       470., 105., 512., 472., 470., 487., 515., 488., 499., 103., 517.,
       503., 514., 459.,  90.,  95., 509.])

In [50]:
model_encoder.predict([data_for_dl, s0, c0])[0]

array([0.01328393, 0.00234323, 0.0007984 , 0.00058817, 0.00055968,
       0.00055581, 0.00055527, 0.00055527, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521,
       0.00055521, 0.00055521, 0.00055521, 0.00055521, 0.00055521],
      dtype=float32)

### Create decoder

In [51]:
Ty = 287
Tx = 50

In [52]:
# Defined shared layers as global variables
repeator_decoder = RepeatVector(Tx)
concatenator_decoder = Concatenate(axis=-1)
densor1_decoder = Dense(10, activation = "relu")
densor2_decoder = Dense(1, activation = "relu")
activator_decoder = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor_decoder = Dot(axes = 1)

In [53]:
def one_step_attention_decoder(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    s_prev = repeator_decoder(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator_decoder([a,s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
    e = densor1_decoder(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
    e = densor2_decoder(e)
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator_decoder(e)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor_decoder([alphas,a])
    
    return context

In [54]:
n_a = 16
n_s = 32
m = 19
post_activation_LSTM_cell_decoder = LSTM(n_s, return_state = True)
output_layer_decoder = Dense(1, activation='relu')

In [55]:
def model_decoder(Tx, Ty, n_a, n_s):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X_decoder = Input(shape=(Tx,1), name='X_decoder')
    s0_decoder = Input(shape=(n_s,), name='s0_decoder')
    c0_decoder = Input(shape=(n_s,), name='c0_decoder')
    s = s0_decoder
    c = c0_decoder
    
    # Initialize empty list of outputs
    outputs = []

    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
    a_decoder = Bidirectional(LSTM(n_a,return_sequences=True))(X_decoder)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention_decoder(a_decoder, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell_decoder(context, initial_state = [s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer_decoder(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    final_output = Concatenate(axis=-1)(outputs)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs=[X_decoder, s0_decoder, c0_decoder], outputs=final_output)
    
    return model

In [56]:
model_decoder = model_decoder(Tx, Ty, n_a, n_s)

In [57]:
opt = Adam(lr=0.02, beta_1=0.9, beta_2=0.999, decay=0.01)
model_decoder.compile(opt, loss='mean_squared_error')

In [58]:
target_list = target_list.reshape((19,50,1))
data_for_dl2 = data_for_dl[:,:,0]

In [59]:
data_for_dl2.shape

(19, 287)

In [60]:
model_decoder.fit([target_list, s0, c0], data_for_dl2, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x18a743630>