In [None]:
!nvidia-smi

Wed Mar 10 16:39:15 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.56       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   55C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Experiment 3

Now, we'll take a different approach which will result in increased number of samples. We'll use a *'sliding window'* technique that will slide over the samples and partition the sample into encoder and decoder data.

For example, if `sliding_window=4` then for a sample of length=25, first 4 timesteps will go to encoder_data and rest would be decoder_data. The window will then slide, lets say 1 timestep to the right, making timesteps 2-5 the encoder data and rest as decoder_data. This way, the sliding window will be shifted through the entire sample until it touches the threshold for minimum decoder_data_length.

By designing the samples in this way, we hope to better capture the relationship between the attack steps and thus increasing the accuracy of prediction.

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Thesis/Attack\ Step\ Prediction/Implementation

/content/drive/MyDrive/Thesis/Attack Step Prediction/Implementation


In [None]:
import tensorflow as tf
import numpy as np
import random
import csv

import aptgen_utils

In [None]:
# to reflect the changes, the module needs to be reloaded
import importlib
importlib.reload(aptgen_utils)

<module 'aptgen_utils' from '/content/drive/MyDrive/Thesis/Attack Step Prediction/Implementation/aptgen_utils.py'>

In [None]:
# get attack sequences as list of lists
data_text = aptgen_utils.get_data_text()
print(data_text[0])
len(data_text)

['collection TA0009 Email_Collection T1114', 'credential_access TA0006 Credential_Dumping T1003', 'discovery TA0007 System_Information_Discovery T1082', 'collection TA0009 Email_Collection T1114', 'defense_evasion TA0005 File_Deletion T1107', 'persistence TA0003 Scheduled_Task T1053', 'discovery TA0007 System_Information_Discovery T1082', 'collection TA0009 Email_Collection T1114']


800

In [None]:
# strip all spaces from data_text
data_text = [[''.join(''.join(step).split()) for step in data] for data in data_text]
print(data_text[0])
len(data_text)

['collectionTA0009Email_CollectionT1114', 'credential_accessTA0006Credential_DumpingT1003', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114', 'defense_evasionTA0005File_DeletionT1107', 'persistenceTA0003Scheduled_TaskT1053', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114']


800

Lets look at the sequence lengths in the data.

In [None]:
from collections import Counter

# get length of different sequences in a list
sequence_lengths = [len(sequence) for sequence in data_text]

counter = Counter(sequence_lengths)
print("Frequncy of sequence lengths:", counter)
print("Number of unique sequence lengths:", len(counter.keys()))

Frequncy of sequence lengths: Counter({8: 117, 6: 62, 14: 56, 12: 49, 15: 45, 9: 44, 16: 42, 7: 39, 13: 37, 11: 32, 18: 30, 5: 29, 10: 29, 17: 28, 22: 23, 19: 19, 21: 18, 25: 14, 20: 10, 27: 9, 28: 9, 24: 8, 29: 8, 26: 7, 30: 7, 32: 7, 23: 6, 4: 5, 33: 4, 31: 3, 34: 3, 35: 1})
Number of unique sequence lengths: 32


In [None]:
counter

Counter({4: 5,
         5: 29,
         6: 62,
         7: 39,
         8: 117,
         9: 44,
         10: 29,
         11: 32,
         12: 49,
         13: 37,
         14: 56,
         15: 45,
         16: 42,
         17: 28,
         18: 30,
         19: 19,
         20: 10,
         21: 18,
         22: 23,
         23: 6,
         24: 8,
         25: 14,
         26: 7,
         27: 9,
         28: 9,
         29: 8,
         30: 7,
         31: 3,
         32: 7,
         33: 4,
         34: 3,
         35: 1})

We'll work with a sliding window of 4 to start with, so all samples with length 4 will be removed.

In [None]:
def remove_sequence(data_text, lengths_to_remove):
    lengths_to_remove = lengths_to_remove
    truncated_data_text = [sequence for sequence in data_text if len(sequence) not in lengths_to_remove]
    print("First sequence after truncating:", truncated_data_text[0])
    print("Number of sequence after truncating:", len(truncated_data_text))

    return truncated_data_text

In [None]:
# remove sequences having length 4
lengths_to_remove = [4]
truncated_data_text = remove_sequence(data_text, lengths_to_remove)

First sequence after truncating: ['collectionTA0009Email_CollectionT1114', 'credential_accessTA0006Credential_DumpingT1003', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114', 'defense_evasionTA0005File_DeletionT1107', 'persistenceTA0003Scheduled_TaskT1053', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114']
Number of sequence after truncating: 795


In [None]:
def get_encoder_decoder_text(text, sliding_window_size, shift_right, min_decoder_length):
    encoder_text, decoder_text = [], []

    for sequence in text:
        # the window will slide 'shift_right' positions at each iteration
        for i in range(0, len(sequence) - sliding_window_size, shift_right):
            # break if decoder_text length exceeds min_decoder_length
            if i+sliding_window_size+min_decoder_length > len(sequence):
                break

            encoder_text.append(sequence[i:i+sliding_window_size])
            decoder_text.append(sequence[i+sliding_window_size:])

    return encoder_text, decoder_text

In [None]:
sliding_window_size = 5
shift_right = 1
min_decoder_length = 1

encoder_text, decoder_text = get_encoder_decoder_text(truncated_data_text, sliding_window_size, shift_right, min_decoder_length)
print("Number of samples:", len(encoder_text))

Number of samples: 6835


In [None]:
def append_sos_eos(decoder_text):    
    decoder_input_text, decoder_target_text = [], []

    for sequence in decoder_text:
        decoder_input_text.append(["<sos>"] + sequence[:])
        decoder_target_text.append(sequence[:] + ["<eos>"])

    return decoder_input_text, decoder_target_text

In [None]:
import copy
encoder_input_text = copy.deepcopy(encoder_text)
decoder_input_text, decoder_target_text = append_sos_eos(decoder_text)

print("encoder_input_text[0]:", encoder_input_text[0], "\ndecoder_input_text[0]:", decoder_input_text[0], "\ndecoder_target_text[0]:", decoder_target_text[0], "\n")
print("encoder_input_text[69]:", encoder_input_text[69], "\ndecoder_input_text[69]:", decoder_input_text[69], "\ndecoder_target_text[69]:", decoder_target_text[69], "\n")
print("encoder_input_text[169]:", encoder_input_text[169], "\ndecoder_input_text[169]:", decoder_input_text[169], "\ndecoder_target_text[169]:", decoder_target_text[169], "\n")
print("encoder_input_text[650]:", encoder_input_text[650], "\ndecoder_input_text[650]:", decoder_input_text[650], "\ndecoder_target_text[650]:", decoder_target_text[650], "\n")

encoder_input_text[0]: ['collectionTA0009Email_CollectionT1114', 'credential_accessTA0006Credential_DumpingT1003', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114', 'defense_evasionTA0005File_DeletionT1107'] 
decoder_input_text[0]: ['<sos>', 'persistenceTA0003Scheduled_TaskT1053', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114'] 
decoder_target_text[0]: ['persistenceTA0003Scheduled_TaskT1053', 'discoveryTA0007System_Information_DiscoveryT1082', 'collectionTA0009Email_CollectionT1114', '<eos>'] 

encoder_input_text[69]: ['discoveryTA0007Permission_Groups_DiscoveryT1069', 'defense_evasionTA0005File_DeletionT1107', 'persistenceTA0003Scheduled_TaskT1053', 'discoveryTA0007System_Information_DiscoveryT1082', 'discoveryTA0007Permission_Groups_DiscoveryT1069'] 
decoder_input_text[69]: ['<sos>', 'discoveryTA0007Remote_System_DiscoveryT1018', 'collectionTA0009Data_StagedT1074', 'lateral_movementTA0008Pass_the_

# Text sequences to integer sequences

In [None]:
from keras.preprocessing.text import Tokenizer

def get_tokenizer(text):
    tokenizer = Tokenizer(num_words=200, lower=False) # we just give a large enough arbitrary number
    tokenizer.fit_on_texts(text)
    
    # builid word2idx and idx2word dictionary
    word2idx = copy.deepcopy(tokenizer.word_index)
    idx2word = {v:k for k, v in tokenizer.word_index.items()}

    return tokenizer, word2idx, idx2word

In [None]:
def get_encoder_decoder_indices(encoder_input_text, decoder_input_text, decoder_target_text):
    encoder_tokenizer, encoder_word2idx, encoder_idx2word = get_tokenizer(encoder_input_text)
    print("encoder_word2idx:", encoder_word2idx)
    print("encoder_idx2word:", encoder_idx2word)
    encoder_input_indices = encoder_tokenizer.texts_to_sequences(encoder_input_text)


    decoder_tokenizer, decoder_word2idx, decoder_idx2word = get_tokenizer(decoder_input_text + decoder_target_text)
    print("\ndecoder_word2idx:", decoder_word2idx)
    print("decoder_idx2word:", decoder_idx2word)
    decoder_input_indices = decoder_tokenizer.texts_to_sequences(decoder_input_text)
    decoder_target_indices = decoder_tokenizer.texts_to_sequences(decoder_target_text)

    print("\nencoder_input_indices[0]:", encoder_input_indices[0], "\ndecoder_input_indices[0]:", decoder_input_indices[0], "\ndecoder_target_indices[0]:", decoder_target_indices[0], "\n")
    print("encoder_input_indices[69]:", encoder_input_indices[69], "\ndecoder_input_indices[69]:", decoder_input_indices[69], "\ndecoder_target_indices[69]:", decoder_target_indices[69], "\n")
    print("encoder_input_indices[169]:", encoder_input_indices[169], "\ndecoder_input_indices[169]:", decoder_input_indices[169], "\ndecoder_target_indices[169]:", decoder_target_indices[169], "\n")
    print("encoder_input_indices[650]:", encoder_input_indices[650], "\ndecoder_input_indices[650]:", decoder_input_indices[650], "\ndecoder_target_indices[650]:", decoder_target_indices[650], "\n")

    return encoder_input_indices, decoder_input_indices, decoder_target_indices, encoder_word2idx, encoder_idx2word, decoder_word2idx, decoder_idx2word

In [None]:
# get input_indices and word conversion dicts
encoder_input_indices, decoder_input_indices, decoder_target_indices, encoder_word2idx, encoder_idx2word, decoder_word2idx, decoder_idx2word \
= \
get_encoder_decoder_indices(encoder_input_text, decoder_input_text, decoder_target_text)

encoder_word2idx: {'credential_accessTA0006Credential_DumpingT1003': 1, 'discoveryTA0007File_and_Directory_DiscoveryT1083': 2, 'collectionTA0009Data_StagedT1074': 3, 'defense_evasionTA0005File_DeletionT1107': 4, 'discoveryTA0007Remote_System_DiscoveryT1018': 5, 'defense_evasionTA0005Deobfuscate/Decode_Files_or_InformationT1140': 6, 'lateral_movementTA0008Remote_File_CopyT1105': 7, 'exfiltrationTA0010Exfiltration_Over_Command_and_Control_ChannelT1041': 8, 'collectionTA0009Email_CollectionT1114': 9, 'collectionTA0009Input_CaptureT1056': 10, 'discoveryTA0007System_Time_DiscoveryT1124': 11, 'credential_accessTA0006Credentials_in_FilesT1081': 12, 'defense_evasionTA0005Indicator_Removal_on_HostT1070': 13, 'credential_accessTA0006Input_CaptureT1056': 14, 'persistenceTA0003New_ServiceT1050': 15, 'credential_accessTA0006Credentials_in_RegistryT1214': 16, 'lateral_movementTA0008Remote_Desktop_ProtocolT1076': 17, 'persistenceTA0003Scheduled_TaskT1053': 18, 'discoveryTA0007Account_DiscoveryT1087':

# Padding, Split & One-hot 🔥

In [None]:
max_encoder_seq_length = max([len(sequence) for sequence in encoder_input_indices])
max_decoder_seq_length = max([len(sequence) for sequence in decoder_input_indices])

print("Max sequence length for encoder:", max_encoder_seq_length)
print("Max sequence length for decoder:", max_decoder_seq_length)

Max sequence length for encoder: 5
Max sequence length for decoder: 31


In [None]:
from keras.preprocessing.sequence import pad_sequences

def get_padded_inputs(encoder_input_indices, decoder_input_indices, decoder_target_indices):
    
    padded_decoder_input = pad_sequences(decoder_input_indices, maxlen=max_decoder_seq_length, dtype='int32', padding='post')
    padded_decoder_target = pad_sequences(decoder_target_indices, maxlen=max_decoder_seq_length, dtype='int32', padding='post')
    print("\npadded_decoder_input[0]", padded_decoder_input[0])
    print("padded_decoder_target[0]", padded_decoder_target[0])
    print("padded_decoder_input[69]", padded_decoder_input[69])
    print("padded_decoder_target[69", padded_decoder_target[69])

    return padded_decoder_input, padded_decoder_target

In [None]:
padded_decoder_input, padded_decoder_target = get_padded_inputs(encoder_input_indices, decoder_input_indices, decoder_target_indices)


padded_decoder_input[0] [ 5 25 28 11  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0]
padded_decoder_target[0] [25 28 11  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0]
padded_decoder_input[69] [ 5  9  2 22  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0]
padded_decoder_target[69 [ 9  2 22  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0]


In [None]:
def get_train_test_data(num_train_sample, encoder_input_indices, padded_decoder_input, padded_decoder_target):
    num_total_sample = len(encoder_input_indices)
    random.seed(69)
    random_indices = list(range(num_total_sample))
    random.shuffle(random_indices)
    # print(random_indices)
    encoder_train_input = [encoder_input_indices[i] for i in random_indices[:num_train_sample]]
    decoder_train_input = [padded_decoder_input[i] for i in random_indices[:num_train_sample]]
    decoder_train_target = [padded_decoder_target[i] for i in random_indices[:num_train_sample]]

    encoder_test_input = [encoder_input_indices[i] for i in random_indices[num_train_sample:]]
    decoder_test_input = [padded_decoder_input[i] for i in random_indices[num_train_sample:]]
    decoder_test_target = [padded_decoder_target[i] for i in random_indices[num_train_sample:]]

    print("Number of training samples:", len(encoder_train_input))
    print("Number of testing samples:", len(encoder_test_input))

    print("\nencoder_train_input[0]:", encoder_train_input[0], "\ndecoder_train_input[0]:", decoder_train_input[0], "\ndecoder_train_target[0]:", decoder_train_target[0], "\n")
    print("encoder_train_input[69]:", encoder_train_input[69], "\ndecoder_train_input[69]:", decoder_train_input[69], "\ndecoder_train_target[69]:", decoder_train_target[69], "\n")
    print("encoder_test_input[0]:", encoder_test_input[0], "\ndecoder_test_input[0]:", decoder_test_input[0], "\ndecoder_test_target[0]:", decoder_test_target[0], "\n")
    print("encoder_test_input[15]:", encoder_test_input[15], "\ndecoder_test_input[15]:", decoder_test_input[15], "\ndecoder_test_target[15]:", decoder_test_target[15], "\n")

    return encoder_train_input, decoder_train_input, decoder_train_target, encoder_test_input, decoder_test_input, decoder_test_target

In [None]:
# split the data into train and test set
num_total_sample = len(encoder_input_indices)
num_train_sample = int(num_total_sample * 0.8)
encoder_train_input, decoder_train_input, decoder_train_target, encoder_test_input, decoder_test_input, decoder_test_target \
= \
get_train_test_data(num_train_sample, encoder_input_indices, padded_decoder_input, padded_decoder_target)

Number of training samples: 5468
Number of testing samples: 1367

encoder_train_input[0]: [26, 2, 3, 4, 34] 
decoder_train_input[0]: [ 5  2 30 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 
decoder_train_target[0]: [ 2 30 10  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 

encoder_train_input[69]: [6, 49, 5, 7, 1] 
decoder_train_input[69]: [ 5 21  7  2  3  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 
decoder_train_target[69]: [21  7  2  3  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 

encoder_test_input[0]: [30, 30, 3, 8, 18] 
decoder_test_input[0]: [ 5 26 28  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 
decoder_test_target[0]: [26 28  1  6  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0] 

encoder_test_input[15]: [1, 5, 4, 2, 3] 
decoder_test_input[15]: [ 5 36 1

In [None]:
num_encoder_tokens = max(encoder_word2idx.values())
num_decoder_tokens = max(decoder_word2idx.values())

print("Number of unique input tokens:", num_encoder_tokens)
print("Number of unique output tokens:", num_decoder_tokens)

Number of unique input tokens: 50
Number of unique output tokens: 52


In [None]:
def convert_to_onehot(encoder_train_input, decoder_train_input, decoder_train_target):    
    encoder_train_input_oh = tf.one_hot(encoder_train_input, num_encoder_tokens+1, dtype='int32').numpy() # +1 for 0s (were added for padding)
    decoder_train_input_oh = tf.one_hot(decoder_train_input, num_decoder_tokens+1, dtype='int32').numpy()
    decoder_train_target_oh = tf.one_hot(decoder_train_target, num_decoder_tokens+1, dtype='int32').numpy()

    return encoder_train_input_oh, decoder_train_input_oh, decoder_train_target_oh

In [None]:
encoder_train_input_oh, decoder_train_input_oh, decoder_train_target_oh \
= \
convert_to_onehot(encoder_train_input, decoder_train_input, decoder_train_target)

print("encoder_train_input_oh shape:", encoder_train_input_oh.shape)
print("decoder_train_input_oh shape:", decoder_train_input_oh.shape)
print("decoder_train_target_oh shape:", decoder_train_target_oh.shape)

encoder_train_input_oh shape: (5468, 5, 51)
decoder_train_input_oh shape: (5468, 31, 53)
decoder_train_target_oh shape: (5468, 31, 53)


In [None]:
encoder_train_input_oh[0]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0]], dtype=int32)

# Defining the training model

In [None]:
from tensorflow.keras import Model, layers, Input
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow.keras.backend as K

from matplotlib import pyplot

In [None]:
def plot_history(history):

    max_val_acc = max(history.history['val_accuracy'])
    max_val_acc_idx = np.argmax(history.history['val_accuracy'])
    print("Max Validation Accuracy", max_val_acc, " at epoch:", max_val_acc_idx+1, " with Training Accuracy", history.history['accuracy'][max_val_acc_idx])
    print()

    # plot metrics
    pyplot.plot(history.history['loss'])
    pyplot.plot(history.history['val_loss'])
    pyplot.show()
    pyplot.plot(history.history['accuracy'])
    pyplot.plot(history.history['val_accuracy'])
    pyplot.show()

In [None]:
def get_stacked_model(n_units):

    K.clear_session()
    # credit: https://stackoverflow.com/a/56693548/7737870
    # latent_dims is an array which defines the depth of the encoder/decoder, as well as how large
    # the layers should be. So an array of sizes [a,b,c]  would produce a depth-3 encoder and decoder
    # with layer sizes equal to [c,b,a] and [a,b,c] respectively.
    encoder_inputs = Input(shape=(None, num_encoder_tokens+1), name="encoder_input")
    encoder_outputs = encoder_inputs
    encoder_states = []
    for j in range(len(n_units))[::-1]:
        encoder_outputs, h, c = layers.LSTM(n_units[j], return_state=True, return_sequences=bool(j), dropout=0.2, name=f"encoder_lstm_{len(n_units) - j}")(encoder_outputs)
        encoder_states += [h, c]

    decoder_inputs = Input(shape=(None, num_decoder_tokens+1), name="decoder_input")
    masked_decoder_inputs = layers.Masking(mask_value=0, name="decoder_masking")(decoder_inputs)
    decoder_outputs = masked_decoder_inputs
    output_layers = []
    for j in range(len(n_units)):
        output_layers.append( \
            layers.LSTM(n_units[len(n_units) - j - 1], return_sequences=True, return_state=True, name=f"decoder_lstm_{j+1}") \
        )
        decoder_outputs, dh, dc = output_layers[-1](decoder_outputs, initial_state=encoder_states[2*j:2*(j+1)])

    decoder_dense = layers.Dense(num_decoder_tokens+1, activation='softmax', name="decoder_dense")
    decoder_outputs = decoder_dense(decoder_outputs)

    # define training model
    stacked_model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name="stacked_model")

    return stacked_model

In [None]:
def get_stacked_model_with_dropout(n_units, dropout_at, dropout_value):

    K.clear_session()
    # credit: https://stackoverflow.com/a/56693548/7737870
    # latent_dims is an array which defines the depth of the encoder/decoder, as well as how large
    # the layers should be. So an array of sizes [a,b,c]  would produce a depth-3 encoder and decoder
    # with layer sizes equal to [c,b,a] and [a,b,c] respectively.
    encoder_inputs = Input(shape=(None, num_encoder_tokens+1), name="encoder_input")
    encoder_outputs = encoder_inputs
    encoder_states = []
    for j in range(len(n_units))[::-1]:
        encoder_outputs, h, c = layers.LSTM(n_units[j], return_state=True, return_sequences=bool(j), name=f"encoder_lstm_{len(n_units) - j}")(encoder_outputs)
        encoder_states += [h, c]

    decoder_inputs = Input(shape=(None, num_decoder_tokens+1), name="decoder_input")
    masked_decoder_inputs = layers.Masking(mask_value=0, name="decoder_masking")(decoder_inputs)
    decoder_outputs = masked_decoder_inputs
    output_layers = []


    dropout_idx = 0
    j = 0
    for i in range(len(n_units) + len(dropout_at)):
        if i == dropout_at[dropout_idx]:
            output_layers.append(layers.Dropout(dropout_value[dropout_idx]))
            decoder_outputs = output_layers[-1](decoder_outputs)
            if dropout_idx < len(dropout_at)-1:
                dropout_idx += 1
        else:
            output_layers.append( \
                layers.LSTM(n_units[len(n_units) - j - 1], return_sequences=True, return_state=True, name=f"decoder_lstm_{j+1}") \
            )
            decoder_outputs, dh, dc = output_layers[-1](decoder_outputs, initial_state=encoder_states[2*j:2*(j+1)])
            j += 1

    decoder_dense = layers.Dense(num_decoder_tokens+1, activation='softmax', name="decoder_dense")
    decoder_outputs = decoder_dense(decoder_outputs)

    # define training model
    stacked_model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name="stacked_model")

    return stacked_model

In [None]:
# this model will pass context vector from only the last encoder layer to the first decoder layer
def get_stacked_model_last_state_only(n_units):

    K.clear_session()
    # credit: https://stackoverflow.com/a/56693548/7737870
    # latent_dims is an array which defines the depth of the encoder/decoder, as well as how large
    # the layers should be. So an array of sizes [a,b,c]  would produce a depth-3 encoder and decoder
    # with layer sizes equal to [c,b,a] and [a,b,c] respectively.

    # define encoder
    encoder_inputs = Input(shape=(None, num_encoder_tokens+1), name="encoder_input")
    encoder_outputs = encoder_inputs
    for j in range(len(n_units))[::-1]:
        encoder_outputs, h, c = layers.LSTM(n_units[j], return_state=True, return_sequences=bool(j), name=f"encoder_lstm_{len(n_units) - j}")(encoder_outputs)
    encoder_states = [h, c]

    # define decoder
    decoder_inputs = Input(shape=(None, num_decoder_tokens+1), name="decoder_input")
    masked_decoder_inputs = layers.Masking(mask_value=0, name="decoder_masking")(decoder_inputs)
    decoder_outputs = masked_decoder_inputs
    output_layers = []

    # add context vector as the first decoder initial state
    output_layers.append( \
            layers.LSTM(n_units[len(n_units) - 1], return_sequences=True, return_state=True, name=f"decoder_lstm_1") \
        )
    decoder_outputs, dh, dc = output_layers[-1](decoder_outputs, initial_state=encoder_states)
    # add other decoder layers
    for j in range(1, len(n_units)):
        output_layers.append( \
            layers.LSTM(n_units[len(n_units) - j - 1], return_sequences=True, return_state=True, name=f"decoder_lstm_{j+1}") \
        )
        decoder_outputs, dh, dc = output_layers[-1](decoder_outputs)

    decoder_dense = layers.Dense(num_decoder_tokens+1, activation='softmax', name="decoder_dense")
    decoder_outputs = decoder_dense(decoder_outputs)

    # define training model
    stacked_model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name="stacked_model")

    return stacked_model

In [None]:
n_units = [1024, 800, 128]
# dropout_at = [1]
# dropout_value = [0.2]
stacked_model = get_stacked_model(n_units)
# stacked_model = get_stacked_model_with_dropout(n_units, dropout_at, dropout_value)
stacked_model.summary()

# compile
stacked_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Model: "stacked_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_input (InputLayer)      [(None, None, 53)]   0                                            
__________________________________________________________________________________________________
encoder_input (InputLayer)      [(None, None, 51)]   0                                            
__________________________________________________________________________________________________
decoder_masking (Masking)       (None, None, 53)     0           decoder_input[0][0]              
__________________________________________________________________________________________________
encoder_lstm_1 (LSTM)           [(None, None, 128),  92160       encoder_input[0][0]              
______________________________________________________________________________________

In [None]:
# from tensorflow.keras.utils import plot_model
# plot_model(stacked_model)

In [None]:
# # Subclass ModelCheckpoint
# class MyModelCheckpoint(ModelCheckpoint):

#     def __init__(self, *args, **kwargs):
#         super(MyModelCheckpoint, self).__init__(*args, **kwargs)


#     # redefine the save so it only activates after 200 epochs
#     def on_epoch_end(self, epoch, logs=None):
#         if epoch > 150: super(MyModelCheckpoint, self).on_epoch_end(epoch, logs)

# chkpoint_filepath = "(5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked"
# model_checkpoint_callback = MyModelCheckpoint(
#     filepath=chkpoint_filepath,
#     save_best_only=True,
#     monitor='val_accuracy',
#     mode='max',
#     verbose=0
# )

In [None]:
# K.clear_session()
# # fit
# history = stacked_model.fit( \
#             [encoder_train_input_oh, decoder_train_input_oh], decoder_train_target_oh, \
#             batch_size=64, \
#             epochs=500, \
#             validation_split=0.1,
#             callbacks=model_checkpoint_callback
#         )
# # stacked_model.save("(5_enc,1_layer,32_units)augmented_s2s_stacked")

In [None]:
# Subclass ModelCheckpoint
class MyModelCheckpoint(ModelCheckpoint):

    def __init__(self, *args, **kwargs):
        super(MyModelCheckpoint, self).__init__(*args, **kwargs)


    # redefine the save so it only activates after a number of epochs
    def on_epoch_end(self, epoch, logs=None):
        if epoch > 150: super(MyModelCheckpoint, self).on_epoch_end(epoch, logs)

chkpoint_filepath = "(5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked"
model_checkpoint_callback = MyModelCheckpoint(
    filepath=chkpoint_filepath,
    save_best_only=True,
    monitor='accuracy',
    mode='max',
    verbose=0
)

In [None]:
K.clear_session()
# fit
history = stacked_model.fit( \
            [encoder_train_input_oh, decoder_train_input_oh], decoder_train_target_oh, \
            batch_size=64, \
            epochs=500, \
            callbacks=model_checkpoint_callback
        )
# stacked_model.save("(5_enc,1_layer,32_units)augmented_s2s_stacked")

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78



INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 153/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 154/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 155/500
Epoch 156/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 157/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 187/500
Epoch 188/500
Epoch 189/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 255/500
Epoch 256/500
Epoch 257/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 284/500
Epoch 285/500
Epoch 286/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 



INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500




INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


INFO:tensorflow:Assets written to: (5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked/assets


Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [None]:
max_acc = max(history.history['accuracy'])
max_acc_idx = np.argmax(history.history['accuracy'])
print("Max Accuracy", max_acc, " at epoch:", max_acc_idx+1)

pyplot.plot(history.history['loss'])
pyplot.show()
pyplot.plot(history.history['accuracy'])
pyplot.show()

NameError: ignored

In [None]:
# plot_history(history)

KeyError: ignored

# Inference mode

In [None]:
def get_inference_model(model):
    # define inference encoder
    encoder_inputs = model.input[0]
    encoder_states = []
    for i in range(1, len(n_units)+1):
        encoder_states += model.get_layer(f'encoder_lstm_{i}').output[1:]
    infer_encoder_model = Model(encoder_inputs, encoder_states)


    # define inference decoder
    decoder_inputs = model.input[1]
    decoder_outputs = decoder_inputs
    decoder_states_inputs = []
    decoder_states = []
    for j in range(len(n_units))[::-1]:
        current_state_inputs = [Input(shape=(n_units[j],)) for _ in range(2)]
        temp = model.get_layer(f'decoder_lstm_{len(n_units)-j}')(decoder_outputs, initial_state=current_state_inputs)
        decoder_outputs, curr_states = temp[0], temp[1:]

        decoder_states += curr_states
        decoder_states_inputs += current_state_inputs

    decoder_dense = model.get_layer('decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)
    infer_decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states, name="augmented_s2s_model")
    
    return infer_encoder_model, infer_decoder_model

In [None]:
def decode_sequence(infer_encoder_model, infer_decoder_model, input_seq, actual_target_sequence):
    # print(np.count_nonzero(actual_target_sequence))
    # if np.count_nonzero(actual_target_sequence) != 5:
    #     return -1

    input_seq_oh = tf.one_hot(input_seq, num_encoder_tokens+1, dtype='int32').numpy()

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens+1))
    # Populate the first character of target sequence with the start character <sos>
    target_seq[0, 0, decoder_word2idx['<sos>']] = 1

    # Encode the input as state vectors.
    states_value = infer_encoder_model.predict(input_seq_oh)

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sequence = []
    # print("Actual Input Sequence:", input_seq)
    # print("Text:", convert_to_seq(input_seq[0], encoder_idx2word), "\n")
    # print("Actual Target Sequence:", actual_target_sequence)
    # print("Text:", convert_to_seq(actual_target_sequence, decoder_idx2word), "\n")
    idx = 0

    while not stop_condition:
        #print(target_seq)
        # output_tokens, h, c = infer_decoder_model.predict([target_seq] + states_value)
        to_split = infer_decoder_model.predict([target_seq] + states_value)
        output_tokens, states_value = to_split[0], to_split[1:]

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, 0])
        decoded_sequence.append(sampled_token_index)
        #sampled_step = decoder_idx2word[sampled_token_index]
        
        # Exit condition: either hit max length
        # or find stop character <eos>
        if actual_target_sequence[idx] == decoder_word2idx['<eos>']:
            stop_condition = True

        # Update the target sequence (of length 1)
        target_seq = np.zeros((1, 1, num_decoder_tokens+1))
        # print("actual:", actual_target_sequence[idx])
        # print("predicted:", sampled_token_index)
        if sampled_token_index == actual_target_sequence[idx]:
            # print("match")
            target_seq[0, 0, sampled_token_index] = 1
        else:
            target_seq[0, 0, actual_target_sequence[idx]] = 1 # feed in the actual step in case of wrong prediction
        
        # Update states
        # states_value = [h, c]
        idx += 1

    # print("Predicted Sequence:", decoded_sequence)
    # print("Text:", convert_to_seq(decoded_sequence, decoder_idx2word), "\n")
    return decoded_sequence

In [None]:
from tqdm import tqdm

In [None]:
def predict(infer_encoder_model, infer_decoder_model, encoder_test_input, decoder_test_target): 
    predicted_sequences = []
    correct_steps = []
    total_step_count = 0
    accurate_prediction_count = 0

    num_test_samples = len(encoder_test_input)
    for i in tqdm(range(num_test_samples)):
    # for i in range(num_test_samples):
        input_seq = encoder_test_input[i:i+1]
        decoded_sequence = decode_sequence(infer_encoder_model, infer_decoder_model, input_seq, decoder_test_target[i])

        correct_steps = [i for i, j in zip(decoder_test_target[i], decoded_sequence) if i == j]
        curr_acc_pred_cnt = len(correct_steps)
        accurate_prediction_count += curr_acc_pred_cnt
        total_step_count += len(decoded_sequence)
        # print(curr_acc_pred_cnt, " out of ",  len(decoded_sequence)," step(s) correctly predicted")

        predicted_sequences.append(decoded_sequence)
        # print()

    print("\nTotal Predicted Steps:", total_step_count)
    print("Total Accurate Prediction:", accurate_prediction_count)

    return predicted_sequences

In [None]:
def convert_to_seq(seq, idx2word):
# seq = [30, 30, 3, 8, 18]
    attack_sequence = []
    for step_idx in seq:
        if step_idx == 0:
            break
        attack_sequence.append(idx2word[step_idx])
    return attack_sequence

In [None]:
from tensorflow.keras import models
model = models.load_model("(5_enc,3_layer,1024_800_128_units)augmented_s2s_stacked")

In [None]:
infer_encoder_model, infer_decoder_model = get_inference_model(model)
infer_decoder_model.summary()
predicted_sequences = predict(infer_encoder_model, infer_decoder_model, encoder_test_input, decoder_test_target)

  0%|          | 0/1367 [00:00<?, ?it/s]

Model: "augmented_s2s_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
decoder_input (InputLayer)      [(None, None, 53)]   0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 128)]        0                                            
__________________________________________________________________________________________________
decoder_lstm_1 (LSTM)           [(None, None, 128),  93184       decoder_input[0][0]              
                                                                 input_1[0][0]  

100%|██████████| 1367/1367 [10:14<00:00,  2.22it/s]


Total Predicted Steps: 11583
Total Accurate Prediction: 10088



