# TRANSFORMER ARCHITECTURE FOR INLINED FUNCTION INSTRUCTION IDENTIFICATION

##Step 0: mount Drive and import libraries

In [1]:
from google.colab import drive
import os

#Palmtree imports
from torch import nn
from scipy.ndimage.filters import gaussian_filter1d
from torch.autograd import Variable
import torch
import numpy as np

#Data preprocessing imports
from pickle import load

#Model training imports
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf
tfk = tf.keras
tfkl = tfk.layers

drive.mount('/content/gdrive')

  from scipy.ndimage.filters import gaussian_filter1d


Mounted at /content/gdrive


## Step 1: load Palmtree embedding

Install  bert-pytorch

In [2]:
!pip install bert-pytorch

Collecting bert-pytorch
  Downloading bert_pytorch-0.0.1a4-py3-none-any.whl (22 kB)
Installing collected packages: bert-pytorch
Successfully installed bert-pytorch-0.0.1a4


Import modules and load [pre-trained PalmTree model](https://github.com/palmtreemodel/PalmTree/tree/master/pre-trained_model)

In [3]:
os.chdir("/content/gdrive/MyDrive/Tesi/Palmtree")
from config import *
import eval_utils as utils

palmtree = utils.UsableTransformer(model_path="./palmtree/transformer.ep19", vocab_path="./palmtree/vocab")


Loading Vocab ./palmtree/vocab
Vocab Size:  6631


Copy the how2use code to test correct import and functioning embedding

**NOTE:** might require GPU environment


In [4]:
text = ["mov rbo rdi",
        "mov ebx 0x1",
        "mov rdx rbx",
        "call memcpy",
        "mov [ rcx + rbx ] 0x0",
        "mov rcx rax",
        "mov [ rax ] 0x2e"]

# it is better to make batches as large as possible.
embeddings = palmtree.encode(text)
print("usable embedding of this basicblock:", embeddings)
print("the shape of output tensor: ", embeddings.shape)

usable embedding of this basicblock: [[ 2.38421893e+00  1.58078265e+00  4.70259756e-01  2.52216011e-01
  -1.39860582e+00 -1.04384534e-01 -1.10426629e+00 -1.68176115e+00
  -1.17237246e+00 -1.37533319e+00  5.52811138e-02  3.41558576e+00
  -1.93328857e+00 -9.05740857e-01  1.92183721e+00  1.09623921e+00
   2.85070395e+00 -4.09158278e+00  1.91642272e+00 -2.82078171e+00
   2.21341968e+00 -1.94647503e+00  1.33180404e+00  4.04114914e+00
   1.64850616e+00 -4.89377737e+00 -8.58392656e-01  3.81815314e-01
   1.22160578e+00  8.96680772e-01  1.39022970e+00 -9.57940876e-01
   2.20012975e+00 -3.97315502e-01  1.30700815e+00  2.28564811e+00
  -1.20861506e+00  1.11897182e+00 -1.15110576e+00  3.31326675e+00
   3.54582453e+00  1.17317832e+00  2.26531267e+00 -2.55426836e+00
  -1.19632065e+00 -1.69074559e+00 -1.89549878e-01  8.66954505e-01
   3.60986185e+00 -8.24573815e-01  2.46334076e-01 -8.00894547e+00
   1.17532957e+00  1.64168831e-02  2.99125409e+00  2.44515300e+00
   1.55092001e+00 -2.10120177e+00 -1.78

## Step 2a: load the already preprocessed dataset

In [None]:
os.chdir("/content/gdrive/MyDrive/Tesi/deinliner")

with open("data/input_embedding.npy", "rb") as input_embedding_backup:
  input_embedding = np.load(input_embedding_backup)
with open("data/input_label.npy", "rb") as input_label_backup:
  input_label = np.load(input_label_backup)
with open("data/target.npy", "rb") as target_backup:
  target = np.load(target_backup)


print(input_embedding.shape, input_label.shape, target.shape)

(49272, 20, 128) (49272, 12) (49272, 20)


## Step 2b: Data preprocessing


### 2.1: Load pickled dataset

Unpickle the snippet list object - all original dependencies are necessary

In [5]:
os.chdir("/content/gdrive/MyDrive/Tesi/deinliner")

In [6]:
!pip install pyelftools angr
from extract_dataset import *

Collecting pyelftools
  Downloading pyelftools-0.30-py2.py3-none-any.whl (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting angr
  Downloading angr-9.2.76-py3-none-manylinux2014_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting CppHeaderParser (from angr)
  Downloading CppHeaderParser-2.7.4.tar.gz (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.4/54.4 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting GitPython (from angr)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ailment==9.2.76 (from angr)
  Downloading ailment-9.2.76-py3-none-any.whl (27 

In [7]:
with open("data/pickled_data.pickle", "rb") as pickled_dataset:
  snippet_list = load(pickled_dataset)

Quick test

In [8]:
snippet = snippet_list[49]
print(len(snippet.input_seq))
print(len(snippet.target_seq))
for i in range(len(snippet.input_seq)):
  print("{}, {}\n".format(str(snippet.input_seq[i]), snippet.target_seq[i]))



29
29
 mov rdx qword ptr  [ rbp  +  0x40 ] , True

 mov rcx qword ptr  [ rbp  +  0x38 ] , True

 mov rax rdx, True

 sub rax rcx, True

 sar rax 2, True

 mov qword ptr  [ rbp  +  0x50 ]  0, False

 lea r12  [ rbp  +  0x50 ] , False

 lea r13  [ rbp  +  0x68 ] , False

 lea rsi  [ rip  +  0x8ed5 ] , False

 mov qword ptr  [ rbp  +  0x58 ]  0, False

 mov qword ptr  [ rbp ]  rsi, False

 movsxd rsi dword ptr  [ rbp  +  0x10 ] , False

 mov qword ptr  [ rbp  +  0x60 ]  0, False

 mov qword ptr  [ rbp  +  0x68 ]  0, False

 mov qword ptr  [ rbp  +  0x70 ]  0, False

 mov qword ptr  [ rbp  +  0x78 ]  0, False

 mov qword ptr  [ rbp  +  0x80 ]  0, False

 mov qword ptr  [ rbp  +  0x88 ]  0, False

 mov qword ptr  [ rbp  +  0x90 ]  0, False

 cmp rsi rax, True

 ja [addr], True

 jae [addr], True

 lea rax  [ rcx  +  rsi * 4 ] , True

 cmp rax rdx, True

 je [addr], True

 mov qword ptr  [ rbp  +  0x40 ]  rax, True

 sub rsi rax, True

 lea rdi  [ rbp  +  0x38 ] , False

 call [addr], True



### 2.2: Split the dataset

Perform the train/validation/test split directly on snippets, in order to maintain class distribution and keep more information on test dataset for more precise testing

In [9]:
def percentage_split(data, percentage):
  split_size = round(len(data) * (1-percentage) - 0.5)
  return data[:split_size], data[split_size:]

def split_by_methods(snippet_list, test_perc, val_perc):
  methods_dict = {method : [] for method in METHODS}
  for snippet in snippet_list:
    methods_dict[snippet.method].append(snippet)

  train, val, test= [], [], []
  for key in methods_dict:
    method_train, method_test = percentage_split(methods_dict[key], test_perc)
    method_train, method_val = percentage_split(method_train, val_perc)
    train += method_train
    val += method_val
    test += method_test
  return train, val, test

In [10]:
test_perc = 0.2
val_perc = 0.2
train_snippets, val_snippets, test_snippets = split_by_methods(snippet_list, test_perc, val_perc)

print(len(train_snippets), len(val_snippets), len(test_snippets))

19137 4788 5989


### 2.3 Encode dataset

Create a dictionary for bidirectional encoding of methods - represented via one-hot encoding

In [11]:
method_list = list(METHODS)
num_label_list = [method_list.index(elem) for elem in method_list]
one_hot = tfk.utils.to_categorical(num_label_list)
label_encoding = {method : one_hot[method_list.index(method)] for method in method_list }


For each snippet in training data:
- Produce its embedding via Palmtree, and split sequences according to window length
- Encode its method label as one-hot
- Encode its target sequence as a binary sequence

All in the form of numpy arrays to feed into the model

Note: encoding is quite slow!

In [14]:
def segment2D(vector, window_len):
  if (len(vector) % window_len > 0):
    num_windows = (len(vector)//window_len)+1
    ceiling = num_windows * window_len
    print("Size {} will be extended to {}".format(len(vector), ceiling))
    vector = np.pad(vector, ((0, ceiling-len(vector)), (0,0)))
  else:
    num_windows = len(vector)//window_len
  segments = np.split(vector, num_windows)
  return segments

def segment1D(vector, window_len):
  if (len(vector) % window_len > 0):
    num_windows = (len(vector)//window_len)+1
    ceiling = num_windows * window_len
    print("Size {} will be extended to {}".format(len(vector), ceiling))
    vector = np.pad(vector, (0, ceiling-len(vector)))
  else:
    num_windows = len(vector)//window_len
  segments = np.split(vector, num_windows)
  return segments


def encode_snippet(snippet, window_len):
  embedding = palmtree.encode(snippet.input_seq)
  embedded_seq_list = segment2D(embedding, window_len)
  encoded_method_list = [label_encoding[snippet.method] for i in range(len(embedded_seq_list))]
  encoded_target = np.array([1 if inl else 0 for inl in snippet.target_seq])
  encoded_target_list = segment1D(encoded_target, window_len)
  return embedded_seq_list, encoded_method_list, encoded_target_list

def encode_snippet_list(snippet_list, window_len):
  input_embedding_list = []
  input_label_list = []
  target_list = []
  for snippet in snippet_list:
    input_seqs, input_labels, target_seqs = encode_snippet(snippet, window_len)
    input_embedding_list += (input_seqs)
    input_label_list += (input_labels)
    target_list +=(target_seqs)

  input_embedding = np.stack(input_embedding_list)
  input_label = np.stack(input_label_list)
  target = np.stack(target_list)
  return input_embedding, input_label, target



#### Option A: extract and encode data from scratch

In [15]:
window_len = 20
x1_train, x2_train, y_train  = encode_snippet_list(train_snippets, window_len)
x1_val, x2_val, y_val = encode_snippet_list(val_snippets, window_len)

print(x1_train.shape, x2_train.shape, y_train.shape)
print(x1_val.shape, x2_val.shape, y_val.shape)

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 5 will be extended to 20
Size 5 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 3 will be extended to 20
Size 48 will be extended to 60
Size 

In [18]:
with open("data/train.npz", "wb") as train_data_backup:
  np.savez(train_data_backup, x1_train=x1_train, x2_train=x2_train, y_train=y_train)
with open("data/val.npz", "wb") as val_data_backup:
  np.savez(val_data_backup, x1_val=x1_val, x2_val=x2_val, y_val=y_val)


#### Option B: load already encoded data

In [20]:
with open("data/train.npz", "rb") as train_data_backup:
  test_data = np.load(train_data_backup)
  x1_train = test_data['x1_train']
  x2_train = test_data['x2_train']
  y_train = test_data['y_train']

with open("data/val.npz", "rb") as val_data_backup:
  test_data = np.load(val_data_backup)
  x1_val = test_data['x1_val']
  x2_val = test_data['x2_val']
  y_val = test_data['y_val']

print(x1_train.shape, x2_train.shape, y_train.shape)
print(x1_val.shape, x2_val.shape, y_val.shape)

(31192, 20, 128) (31192, 12) (31192, 20)
(8221, 20, 128) (8221, 12) (8221, 20)


## Step 3: build the model and train it

Perform the train/test/validation split

In [79]:
def build_LSTM(input_shape):
  input_layer = tfkl.Input(shape=input_shape, name='Input')
  masking = tfkl.Masking(mask_value=0)(input_layer)
  first_lstm = tfkl.Bidirectional(tfkl.LSTM(32, return_sequences=True, input_shape=input_shape))(masking)
  second_lstm = tfkl.Bidirectional(tfkl.LSTM(32, return_sequences=True, input_shape=input_shape))(first_lstm)
  third_lstm = tfkl.Bidirectional(tfkl.LSTM(32, return_sequences=True, input_shape=input_shape))(second_lstm)
  dense_layer = tfkl.Dense(32, activation='leaky_relu')(third_lstm)
  output_dense = tfkl.Dense(1, activation='sigmoid')(dense_layer)
  output_layer = tfkl.Flatten()(output_dense)

  model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')
  model.compile(loss='binary_crossentropy', optimizer=tfk.optimizers.Adam(1e-2), metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5)])
  return model

print(x1_train.shape[1:])
decoder = build_LSTM(x1_train.shape[1:])

(20, 128)


Print summary

In [22]:
decoder.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 20, 128)]         0         
                                                                 
 masking (Masking)           (None, 20, 128)           0         
                                                                 
 bidirectional (Bidirection  (None, 20, 64)            41216     
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 20, 64)            24832     
 onal)                                                           
                                                                 
 dense (Dense)               (None, 20, 32)            2080      
                                                                 
 dense_1 (Dense)             (None, 20, 1)             33    

In [80]:
batch_size = 256
epochs = 200

history = decoder.fit(
    x = x1_train,
    y = y_train,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = (x1_val, y_val),
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_binary_accuracy', mode='max', patience=10, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_binary_accuracy', mode='max', patience=5, factor=0.5, min_lr=1e-5)
    ]
).history

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200


Optionally, save the model

In [64]:
!mkdir models
!ls
decoder.save("models/32BiLSTMx2+32CNN")

asm_extraction	dwarf_parsing	    models	 snippet_creation
data		extract_dataset.py  __pycache__  utils


## Step 4: evaluate on test data

### 4.1 Sort test data according to different criterias


Main criterias observed for each snippet include:
- Method
- Optimization
- Block length
- Instruction length
- Percentage of inlined instructions? Idk

In [71]:
max_blocksize = max([snippet.blocksize for  snippet in test_snippets])
criterias = {"Method": [METHODS, lambda x : x.method],
             "Optimization": [OPT_LEVELS, lambda x : x.opt],
             "Block size": [{snippet.blocksize for snippet in test_snippets}, lambda x : x.blocksize],
             #"Instruction size": [{len(snippet.instructions) for snippet in test_snippets}, lambda x : len(x.instructions)],
}


def initialize_sorted_dict(criterias):
  criterias_sorted_data = {}
  for key in criterias:
    criteria_dict = {label : [] for label in criterias[key][0]}
    criterias_sorted_data[key] = criteria_dict
  return criterias_sorted_data


def sort_by_criteria(snippet_list, criterias, window_len):
  sortby_criterias_test_input = initialize_sorted_dict(criterias)
  sortby_criterias_test_target = initialize_sorted_dict(criterias)
  for snippet in snippet_list:
    input_seqs, input_labels, target_seqs = encode_snippet(snippet, window_len)
    for key in criterias:
      func = criterias[key][1]
      sortby_criterias_test_input[key][func(snippet)] += input_seqs
      sortby_criterias_test_target[key][func(snippet)] += target_seqs

  for key in criterias:
    for label in sortby_criterias_test_input[key]:
      test_input = sortby_criterias_test_input[key][label]
      test_target = sortby_criterias_test_target[key][label]
      if len(test_input) > 0:
        sortby_criterias_test_input[key][label] = np.stack(test_input)
        sortby_criterias_test_target[key][label] = np.stack(test_target)

  return sortby_criterias_test_input, sortby_criterias_test_target

sortby_criterias_test_input, sortby_criterias_test_target = sort_by_criteria(test_snippets, criterias, window_len)


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
Size 7 will be extended to 20
Size 7 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 7 will be extended to 20
Size 7 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 7 will be extended to 20
Size 7 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 7 will be extended to 20
Size 7 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 7 will be extended to 20
Size 7 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4 will be extended to 20
Size 4

Custom function considering precision over whole sequences

In [81]:
def eval_seq2seq_prediction(y_true, y_pred, cap):
  total_preds = len(y_true)
  cap = 10
  accuracy_sorted_dict = {key: 0 for key in range(0, cap)}
  accuracy_sorted_dict[cap] = 0
  for i in range(total_preds):
    wrong_preds = np.count_nonzero(y_true[i] != y_pred[i])
    if wrong_preds < cap:
      accuracy_sorted_dict[wrong_preds] += 1
    else:
      accuracy_sorted_dict[cap] += 1
  return accuracy_sorted_dict

def print_seq2seq_prediction_report(accuracy_sorted_dict):
  total_preds = sum([value for value in accuracy_sorted_dict.values()])
  cap = max(accuracy_sorted_dict.keys())
  for key in accuracy_sorted_dict:
    pred_count = accuracy_sorted_dict[key]
    if pred_count != 0 and key != cap:
      pred_perc = pred_count/total_preds * 100
      print("{} sequences ({}%) have been predicted with {} mistakes".format(pred_count, pred_perc, key))
    elif key == cap:
      pred_perc = pred_count/total_preds * 100
      print("{} sequences ({}%) have been predicted with more than {} mistakes".format(pred_count, pred_perc, key))
  return

Evaluation considers data in two forms
1. All predictions flattened to a single set, evaluating global precision on individual instructions (biased by padding)
2. Prediction of sequences

In [82]:
method_input = sortby_criterias_test_input["Method"]
method_target = sortby_criterias_test_target["Method"]

for key in method_input:
  header = "### EVALUATION REPORT FOR DATA OF METHOD {}:\n".format(key)
  print(header)
  y_true = method_target[key]
  y_pred = np.round(decoder.predict(method_input[key]))
  print(metrics.confusion_matrix(y_true.flatten(), y_pred.flatten()))
  print(metrics.classification_report(y_true.flatten(),y_pred.flatten()))
  cap = 10
  sequence_accuracy_table = eval_seq2seq_prediction(y_true, y_pred, cap)
  print_seq2seq_prediction_report(sequence_accuracy_table)
  print('\n\n')

### EVALUATION REPORT FOR DATA OF METHOD std::map::operator[]:

[[ 4991   663]
 [  229 24237]]
              precision    recall  f1-score   support

           0       0.96      0.88      0.92      5654
           1       0.97      0.99      0.98     24466

    accuracy                           0.97     30120
   macro avg       0.96      0.94      0.95     30120
weighted avg       0.97      0.97      0.97     30120

1258 sequences (83.53253652058433%) have been predicted with 0 mistakes
93 sequences (6.175298804780876%) have been predicted with 1 mistakes
41 sequences (2.7224435590969454%) have been predicted with 2 mistakes
18 sequences (1.1952191235059761%) have been predicted with 3 mistakes
22 sequences (1.4608233731739706%) have been predicted with 4 mistakes
25 sequences (1.6600265604249667%) have been predicted with 5 mistakes
9 sequences (0.5976095617529881%) have been predicted with 6 mistakes
7 sequences (0.46480743691899074%) have been predicted with 7 mistakes
10 sequence

In [75]:
method_input = sortby_criterias_test_input["Optimization"]
method_target = sortby_criterias_test_target["Optimization"]

for key in method_input:
  header = "### EVALUATION REPORT FOR DATA OF METHOD {}:\n".format(key)
  print(header)
  y_true = method_target[key]
  y_pred = np.round(decoder.predict(method_input[key]))
  print(metrics.confusion_matrix(y_true.flatten(), y_pred.flatten()))
  print(metrics.classification_report(y_true.flatten(),y_pred.flatten()))
  cap = 10
  sequence_accuracy_table = eval_seq2seq_prediction(y_true, y_pred, cap)
  print_seq2seq_prediction_report(sequence_accuracy_table)
  print('\n\n')

### EVALUATION REPORT FOR DATA OF METHOD -O2:

[[30561  2277]
 [ 1137 39985]]
              precision    recall  f1-score   support

           0       0.96      0.93      0.95     32838
           1       0.95      0.97      0.96     41122

    accuracy                           0.95     73960
   macro avg       0.96      0.95      0.95     73960
weighted avg       0.95      0.95      0.95     73960

2282 sequences (61.70903190914008%) have been predicted with 0 mistakes
640 sequences (17.306652244456462%) have been predicted with 1 mistakes
331 sequences (8.950784207679826%) have been predicted with 2 mistakes
180 sequences (4.86749594375338%) have been predicted with 3 mistakes
81 sequences (2.190373174689021%) have been predicted with 4 mistakes
73 sequences (1.9740400216333154%) have been predicted with 5 mistakes
46 sequences (1.2439156300703083%) have been predicted with 6 mistakes
20 sequences (0.5408328826392644%) have been predicted with 7 mistakes
10 sequences (0.27041644131

In [83]:
opt_input = sortby_criterias_test_input["Block size"]
opt_target = sortby_criterias_test_target["Block size"]

for key in opt_input:
  header = "### EVALUATION REPORT FOR DATA OF BLOCK SIZE {}:\n".format(key)
  print(header)
  y_true = opt_target[key]
  y_pred = np.round(decoder.predict(opt_input[key]))
  print(metrics.confusion_matrix(y_true.flatten(), y_pred.flatten()))
  print(metrics.classification_report(y_true.flatten(),y_pred.flatten()))
  cap = 10
  sequence_accuracy_table = eval_seq2seq_prediction(y_true, y_pred, cap)
  print_seq2seq_prediction_report(sequence_accuracy_table)
  print('\n\n')

### EVALUATION REPORT FOR DATA OF BLOCK SIZE 1:

[[26886   121]
 [   81  4372]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     27007
           1       0.97      0.98      0.98      4453

    accuracy                           0.99     31460
   macro avg       0.99      0.99      0.99     31460
weighted avg       0.99      0.99      0.99     31460

1454 sequences (92.43483788938335%) have been predicted with 0 mistakes
70 sequences (4.4500953591862675%) have been predicted with 1 mistakes
28 sequences (1.7800381436745074%) have been predicted with 2 mistakes
14 sequences (0.8900190718372537%) have been predicted with 3 mistakes
4 sequences (0.25429116338207247%) have been predicted with 4 mistakes
1 sequences (0.06357279084551812%) have been predicted with 5 mistakes
1 sequences (0.06357279084551812%) have been predicted with 6 mistakes
1 sequences (0.06357279084551812%) have been predicted with 7 mistakes
0 sequences (0.0%) have 