In [1]:
# This is a tutorial on using this library
# first off we need a text_encoder so we would know our vocab_size (and later on use it to encode sentences)
from data.vocab import SentencePieceTextEncoder  # you could also import OpenAITextEncoder

sentence_piece_encoder = SentencePieceTextEncoder(text_corpus_address='openai/model/params_shapes.json',
                                                  model_name='tutorial', vocab_size=20)

if you want to use OpenAI's encoder and pretrained model, please install spacy, and ftfy
if you want to use Google's encoder and pretrained models, please clone the bert submodule


In [2]:
# now we need a sequence encoder
from transformer.model import create_transformer

sequence_encoder_config = {
    'embedding_dim': 6,
    'vocab_size': sentence_piece_encoder.vocab_size,
    'max_len': 8,
    'trainable_pos_embedding': False,
    'num_heads': 2,
    'num_layers': 3,
    'd_hid': 12,
    'use_attn_mask': True
}
sequence_encoder = create_transformer(**sequence_encoder_config)
import keras

assert type(sequence_encoder) == keras.Model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# now look at the inputs:
print(sequence_encoder.inputs)  # tokens, segment_ids, pos_ids, attn_mask

[<tf.Tensor 'token_input:0' shape=(?, 8) dtype=int32>, <tf.Tensor 'segment_input:0' shape=(?, 8) dtype=int32>, <tf.Tensor 'position_input:0' shape=(?, 8) dtype=int32>, <tf.Tensor 'attention_mask_input:0' shape=(?, 1, 8, 8) dtype=float32>]


In [4]:
# tokens is a batch_size * seq_len tensor containing token_ids
# segment_ids is a batch_size * seq_len tensor containing segment_ids (as in segment_{a, b} of BERT)
# pos_ids is a batch_size * seq_len tensor containing position ids (0..max_len)(you will see how can easily generate it)
# attn_mask is a batch_size * 1 * max_len * max_len tensor and can encode padding and causality constraints (ignore it for now)

In [5]:
# for outputs we have:
print(sequence_encoder.outputs)

[<tf.Tensor 'layer_2/ln_2/add_1:0' shape=(?, 8, 6) dtype=float32>]


In [6]:
# 'a long name' is a batch_size * max_len * embedding_dim tensor which is our encoded sequence (here with a transformer)

In [7]:
# now it's time to train it both on pre-training tasks and fine-tuning tasks
# first we need to define our tasks:

In [8]:
from data.dataset import TaskMetadata, TaskWeightScheduler

tasks = [TaskMetadata('lm', is_token_level=True,
                      num_classes=sentence_piece_encoder.vocab_size + sentence_piece_encoder.SPECIAL_COUNT,
                      dropout=0,
                      weight_scheduler=TaskWeightScheduler(active_in_pretrain=True, active_in_finetune=False,
                                                           pretrain_value=1.0))]

In [9]:
# well let's pause and see what this task is, 'lm' is the name of the task
# and 'lm' is also a special task, because it uses a tied decoder (if you don't know what it means, ignore it)
# then num_classes is set to vocab+special_count which is actually incorrect (we are never going to predict mask, pad, )
# but it's here for the tied decoder to work; dropout is for the decoder of this task
# and finally a weight_scheduler, in this example we are only training on 'lm' task during the pretraing but not after
# now let's add a more complex task, a sentence level one with a complex weight_scheduler

In [10]:
class ComplexTaskWeightScheduler(TaskWeightScheduler):  # note: this is an example, it is not a clean code
    def __init__(self, number_of_pretrain_steps, number_of_finetune_steps):
        super().__init__(active_in_pretrain=True, active_in_finetune=True)
        self.number_of_pretrain_steps = number_of_pretrain_steps
        self.number_of_finetune_steps = number_of_finetune_steps

    def get(self, is_pretrain: bool, step: int) -> float:
        return step / (self.number_of_pretrain_steps if is_pretrain else self.number_of_finetune_steps)


number_of_pretrain_steps = 100
number_of_finetune_steps = 100
# in this task we are going to count the number of tokens in a sentence and predict if it's odd or not
#tasks.append(TaskMetadata('odd', is_token_level=False, num_classes=2, dropout=0.3,
#                         weight_scheduler=ComplexTaskWeightScheduler(number_of_pretrain_steps,
#                                                                     number_of_finetune_steps)))

# and let's add a unsolvable task for fun

In [11]:
# now we need a data generator, for a good reference see data.lm_dataset._get_lm_generator_single or _double
# but for now we are going to write a simple one so you understand the Sentence class
# again this is a simple generator just showing you the core ideas
# so for 'lm' task we are just going to predict the token itself (identity function)
# first we are importing things, ignore them for now, I will explain them in a bit

In [12]:
from data.dataset import Sentence, TokenTaskData, SentenceTaskData, TextEncoder
from data.lm_dataset import _create_batch
import random


def tutorial_batch_generator(vocab_size: int, max_len: int, batch_size: int, steps: int):
    def sentence_generator():
        for _ in range(steps):
            # for each sentence we are going to generate up to max_len tokens
            seq_len = random.randint(1, max_len - 1)
            # and this is their ids (in reality we have to use our TextEncoder instance here)
            tokens = [random.randrange(vocab_size) for _ in range(seq_len)]
            # we manually set the last token to EOS (which we will see how it's calculated)
            tokens[-1] = eos_id
            yield Sentence(
                tokens=tokens,
                padding_mask=[True] * seq_len,  # it means that non of the original tokens are padding
                segments=[0] * seq_len,  # for this simple example we are going to use segment_a(0) for all of them
                token_classification={  # we put labels here (for token level tasks)
                    # name_of_the_task: TokenTaskData(target(aka label), label_mask)
                    # there might be situations that you are only interested in predictions for certain tokens,
                    # you can use mask in those situations (see the bert paper to understand this)
                    'lm': TokenTaskData(tokens, [True] * seq_len),
                    # this task is unsolvable so we will see the loss not decreasing
                },
                # similar to token_classification, it's also a dictionary of task to label
                # SentenceTaskData contains (label, where to extract that label_from)
                # in this case we are going to predict whether a sentence has
                # odd number of tokens or not whenever we see eos token
                sentence_classification={}
            )

    # we need eos_id and it's always at this place
    eos_id = vocab_size + TextEncoder.EOS_OFFSET
    # likewise for pad_id
    pad_id = vocab_size + TextEncoder.PAD_OFFSET
    generator = sentence_generator()
    batch = []
    for item in generator:
        batch.append(item)
        if len(batch) == batch_size:
            batch = _create_batch(batch, pad_id, max_len)  # magic to pad and batch sentences
            # at the end it will generate a SentenceBatch which is more than just a list of Sentence
            yield batch
            batch = []

In [13]:
# now we instantiate our generator
# we are going to set steps to a large number (it doesn't matter)
# we have to set batch_size too

In [14]:
batch_size = 5
generator = tutorial_batch_generator(sentence_piece_encoder.vocab_size, sequence_encoder_config['max_len'],
                                     batch_size, steps=10000)

In [15]:
# now let the fun begin :D

In [16]:
from transformer.train import train_model

In [17]:
# we are going to use the same generator for both pretrain and finetune

In [18]:
m = train_model(base_model=sequence_encoder, is_causal=False, tasks_meta_data=tasks, pretrain_generator=generator,
                finetune_generator=generator, pretrain_epochs=100, pretrain_steps=number_of_pretrain_steps // 100,
                finetune_epochs=100, finetune_steps=number_of_finetune_steps // 100, verbose=2)
# now m is ready to be used!
print(m.inputs)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


training:[array([[ 1,  0, 14, 24, 20, 20, 20, 20],
       [13,  8, 12,  2, 11,  7, 24, 20],
       [ 2,  9,  8, 24, 20, 20, 20, 20],
       [13,  6,  6, 10, 12, 24, 20, 20],
       [ 5,  5,  3,  0, 24, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1

 - 4s - loss: 3.2062
Epoch 2/100
training:[array([[12, 16,  3,  6,  4,  3, 24, 20],
       [15, 14, 24, 20, 20, 20, 20, 20],
       [18, 15, 11, 14, 11,  1, 24, 20],
       [ 7,  3, 13, 24, 20, 20, 20, 20],
       [10, 10,  2, 14, 24, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 0., 0.,

target: [[ 0 12 24  0  0  0  0  0]
 [ 3 12 24  0  0  0  0  0]
 [ 6  1 14  0 19 24  0  0]
 [19 19 10 16  2  6 24  0]
 [16 10  7 11 24  0  0  0]]
Epoch 6/100training:[array([[11, 24, 20, 20, 20, 20, 20, 20],
       [15, 17, 24, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [10, 14,  8, 19,  9, 24, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.

Epoch 10/100
training:[array([[15, 14, 24, 20, 20, 20, 20, 20],
       [11,  6,  5, 19,  9,  1, 24, 20],
       [ 6, 18, 10,  2, 24, 20, 20, 20],
       [ 5,  5, 17, 18,  9, 24, 20, 20],
       [17, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1., 1., 1., 0.],
      

target: [[14 17 17 24  0  0  0  0]
 [ 1  3 24  0  0  0  0  0]
 [12 10 24  0  0  0  0  0]
 [17 15 14  7 24  0  0  0]
 [ 4  2 11 11 24  0  0  0]]
Epoch 14/100training:[array([[ 1, 14, 10, 15,  7, 24, 20, 20],
       [ 3, 24, 20, 20, 20, 20, 20, 20],
       [14, 11, 14, 24, 20, 20, 20, 20],
       [ 6, 24, 20, 20, 20, 20, 20, 20],
       [ 2,  1,  2, 16, 24, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0

Epoch 18/100
training:[array([[ 8,  3, 24, 20, 20, 20, 20, 20],
       [18, 24, 20, 20, 20, 20, 20, 20],
       [ 1, 10,  2,  9, 17,  8, 24, 20],
       [ 4,  9,  0, 17,  7, 24, 20, 20],
       [12, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 0., 0., 0., 0., 0., 0.],
      

         [0., 0., 0., 0., 0., 0., 0., 0.]]]], dtype=float32)]
Epoch 22/100target: [[19 24  0  0  0  0  0  0]
 [ 4  3 13 15 12 24  0  0]
 [ 0 13 15 15  5  5 24  0]
 [ 1  1 24  0  0  0  0  0]
 [17 24  0  0  0  0  0  0]]

training:[array([[ 6,  5,  6, 24, 20, 20, 20, 20],
       [19,  3, 10, 24, 20, 20, 20, 20],
       [ 9, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [14,  5, 15, 11, 11,  0, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [0., 0., 0

         [0., 0., 0., 0., 0., 0., 0., 0.]]]], dtype=float32)]
target: [[16 17 10 11 10 16 24  0]
 [16 16  1 12  8 24  0  0]
 [19 10  3 24  0  0  0  0]
 [ 2 24  0  0  0  0  0  0]
 [12 24  0  0  0  0  0  0]]Epoch 26/100
training:[array([[ 1, 12, 24, 20, 20, 20, 20, 20],
       [ 8, 11, 14, 24, 20, 20, 20, 20],
       [ 1, 12, 10, 24, 20, 20, 20, 20],
       [13, 12, 24, 20, 20, 20, 20, 20],
       [ 8, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0.

target: [[ 5  7 24  0  0  0  0  0]
 [13 24  0  0  0  0  0  0]
 [ 1  9  2 24  0  0  0  0]
 [ 4  2 15 15 24  0  0  0]
 [13 19 13 11 24  0  0  0]]
training:[array([[ 5, 16, 12,  3, 24, 20, 20, 20],
       [ 1,  8, 13, 19, 24, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [ 6, 12, 24, 20, 20, 20, 20, 20],
       [ 4,  5,  2, 18, 24, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
        


target: [[18 15  4 10 24  0  0  0]
 [ 1  2  9  5 24  0  0  0]
 [ 4  1 11 24  0  0  0  0]
 [ 6  0 14 19 16 24  0  0]
 [ 2 18  5  8  9 24  0  0]]Epoch 34/100
training:[array([[11, 24, 20, 20, 20, 20, 20, 20],
       [ 4,  5,  4, 10, 13, 14, 24, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [11,  9, 10, 19, 17,  9, 24, 20],
       [ 3,  9,  3, 13,  6, 24, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 

 [ 3 24  0  0  0  0  0  0]]
Epoch 38/100
training:[array([[ 0,  1, 10,  4, 16, 24, 20, 20],
       [14, 15, 15, 18,  2, 24, 20, 20],
       [ 4, 18, 16, 24, 20, 20, 20, 20],
       [ 2,  3, 15, 17, 24, 20, 20, 20],
       [ 1, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1.,

Epoch 42/100training:[array([[10,  8,  8, 24, 20, 20, 20, 20],
       [ 4,  8,  7, 24, 20, 20, 20, 20],
       [ 2,  7, 19, 16,  7, 15, 24, 20],
       [ 5, 18, 15,  4, 14, 24, 20, 20],
       [14,  5,  4, 16,  9,  2, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [1., 1., 1., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 0., 0., 0., 0.],
       

Epoch 46/100
training:[array([[ 3, 24, 20, 20, 20, 20, 20, 20],
       [ 9, 11, 11, 24, 20, 20, 20, 20],
       [ 0, 24, 20, 20, 20, 20, 20, 20],
       [ 8,  4,  5,  9, 19, 24, 20, 20],
       [ 4,  8, 14, 24, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 0., 0., 0., 0.],
      


target: [[15 12  7 12 14 24  0  0]
 [10  7  3 24  0  0  0  0]
 [10 18 10  0 12 24  0  0]
 [ 2  1 24  0  0  0  0  0]
 [ 5 24  0  0  0  0  0  0]]Epoch 50/100

training:[array([[ 2, 13, 17,  4, 11, 24, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [ 4, 13,  7,  7, 18, 24, 20, 20],
       [10,  6,  8, 24, 20, 20, 20, 20],
       [ 5, 13, 18, 17, 18, 19, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0.,

         [0., 0., 0., 0., 0., 0., 0., 0.]]]], dtype=float32)]
target: [[ 6 24  0  0  0  0  0  0]
 [18 24  0  0  0  0  0  0]
 [ 4  2  4  1 24  0  0  0]
 [11 16 10  8 19 17 24  0]
 [ 9  1 12 14 17 18 24  0]]Epoch 54/100

training:[array([[16, 17, 17,  2, 24, 20, 20, 20],
       [13,  5, 11, 10, 13, 24, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [17, 11, 24, 20, 20, 20, 20, 20],
       [16, 17, 16,  3, 19,  5, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1

Epoch 58/100
 - 0s - loss: 3.1408
training:[array([[14, 17, 24, 20, 20, 20, 20, 20],
       [18, 16,  8,  7, 16,  4, 24, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1.


 - 0s - loss: 3.1335
target: [[ 4 24  0  0  0  0  0  0]
 [13 10  7 11  5 24  0  0]
 [11  8 24  0  0  0  0  0]
 [ 7 24  0  0  0  0  0  0]
 [ 4 11  4 11  9 24  0  0]]
Epoch 62/100
training:[array([[10,  8, 24, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [14,  0,  4,  5, 24, 20, 20, 20],
       [11, 24, 20, 20, 20, 20, 20, 20],
       [17, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0.

target: [[24  0  0  0  0  0  0  0]
 [ 8 11 18 13 19 16 24  0]
 [ 5 17  9 24  0  0  0  0]
 [15 24  0  0  0  0  0  0]
 [ 9 15  0 24  0  0  0  0]]
Epoch 66/100
training:[array([[ 4, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [ 5,  7, 24, 20, 20, 20, 20, 20],
       [15,  9, 12,  0, 18, 24, 20, 20],
       [ 6, 18, 19,  9, 18,  2, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 

 [17 17 10  7  9 12 24  0]] - 0s - loss: 3.0883

Epoch 70/100training:[array([[24, 20, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [ 1, 24, 20, 20, 20, 20, 20, 20],
       [ 9, 19,  3,  2, 24, 20, 20, 20],
       [12, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


 

target: [[ 7 24  0  0  0  0  0  0]
 [ 8  2  7 18  1  6 24  0]
 [12  9 12 16 16  7 24  0]
 [ 3 24  0  0  0  0  0  0]
 [13 24  0  0  0  0  0  0]]
Epoch 74/100training:[array([[19, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [15, 10,  7, 14, 18, 24, 20, 20],
       [10, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0

         [0., 0., 0., 0., 0., 0., 0., 0.]]]], dtype=float32)]
Epoch 78/100target: [[13 10 24  0  0  0  0  0]
 [17  9 14 15 24  0  0  0]
 [14  2 11 15 24  0  0  0]
 [14 24  0  0  0  0  0  0]
 [19 19 12 24  0  0  0  0]]

training:[array([[ 4,  7,  0, 18, 24, 20, 20, 20],
       [ 6, 16, 24, 20, 20, 20, 20, 20],
       [ 7, 24, 20, 20, 20, 20, 20, 20],
       [ 1, 11, 16, 11, 16, 18, 24, 20],
       [15, 17, 15,  9, 10, 12, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1

 [18  0  7  1 18 24  0  0]]
Epoch 82/100
 - 0s - loss: 3.0225
training:[array([[17, 15, 18, 12, 15,  9, 24, 20],
       [ 5, 19, 19,  0, 24, 20, 20, 20],
       [18,  9, 18, 16,  4,  1, 24, 20],
       [ 1, 24, 20, 20, 20, 20, 20, 20],
       [10, 18, 18, 17,  5,  7, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],




Epoch 86/100
training:[array([[ 6,  9, 11, 10, 24, 20, 20, 20],
       [11,  9,  8, 24, 20, 20, 20, 20],
       [ 7, 24, 20, 20, 20, 20, 20, 20],
       [18,  7, 19, 24, 20, 20, 20, 20],
       [ 8,  1, 14,  0,  6, 19, 24, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [1., 1., 1., 1., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 0., 0., 0., 0.],
      

target: [[ 6  9 18 17 10  5 24  0]
 [14  0 12  7 24  0  0  0]
 [12 12 14 24  0  0  0  0]
 [ 9  1  8 13 15 24  0  0]
 [ 6 19  7  7 24  0  0  0]]
 - 0s - loss: 3.0842
Epoch 90/100
training:[array([[24, 20, 20, 20, 20, 20, 20, 20],
       [10, 14,  5,  3, 24, 20, 20, 20],
       [10,  5, 24, 20, 20, 20, 20, 20],
       [ 0, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0.,

target: [[ 3 13 15  7 24  0  0  0]
 [ 4 24  0  0  0  0  0  0]
 [11 13 14  7  6  1 24  0]
 [10 14  0 18  3 15 24  0]
 [ 5 17 12  3  5 24  0  0]]
Epoch 94/100
 - 0s - loss: 3.0209
training:[array([[ 7, 18, 10,  2,  2, 15, 24, 20],
       [ 8, 18, 10, 15, 24, 20, 20, 20],
       [ 5, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [ 9,  6,  8, 11, 12, 24, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1.,

Epoch 98/100
training:[array([[17, 24, 20, 20, 20, 20, 20, 20],
       [24, 20, 20, 20, 20, 20, 20, 20],
       [18,  3,  6, 24, 20, 20, 20, 20],
       [ 7,  6, 19,  9, 24, 20, 20, 20],
       [13, 24, 20, 20, 20, 20, 20, 20]], dtype=int32), array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]], dtype=int32), array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]], dtype=int32), array([[[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 0., 0., 0., 0., 0., 0., 0.],
      

ValueError: The model cannot be compiled because it has no loss to optimize.

In [24]:
# token, segment, pos, att_mask, odd_mask (where to extract the class from)

In [41]:
import numpy as np

bs = 6
vs = sentence_piece_encoder.vocab_size
sl = sequence_encoder_config['max_len']
# generate random tokens
token = np.random.randint(0, vs, (bs, sl))
# generate random seg_id
segment = np.random.randint(0, 2, (bs, sl))
# generate pos_id
from transformer.train import generate_pos_ids

pos = generate_pos_ids(bs, sl)
# generate attn_mask
from data.dataset import create_attention_mask

# first generate a padding_mask(1 means it is not padded)
pad_mask = np.random.randint(0, 2, (bs, sl)).astype(np.int8)
# create the mask
mask = create_attention_mask(pad_mask=pad_mask, is_causal=False)
# generate target index
target_index = np.random.randint(0, sl, (bs, 1))
res = m.predict([token, segment, pos, mask, target_index], verbose=2)
assert res[0].shape == (bs, sl, vs + TextEncoder.SPECIAL_COUNT)  # lm
#assert res[1].shape == (bs, 1, 2)  # odd

NameError: name 'm' is not defined

In [40]:
token

NameError: name 'token' is not defined

In [39]:
import numpy as np
a = np.array(
    [[24, 20, 20, 20, 20, 20, 20, 20],
       [17, 15, 18,  3,  8, 24, 20, 20],
       [12, 24, 20, 20, 20, 20, 20, 20],
       [ 4,  2, 13,  5, 18, 17, 24, 20],
       [ 4, 17,  1,  1, 19, 15, 24, 20]]), 
array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]]), 

array([[0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7],
       [0, 1, 2, 3, 4, 5, 6, 7]]), 
array([[[[1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [1., 1., 1., 1., 1., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 0., 0., 0., 0., 0., 0.],
         [1., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]],


       [[[1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [1., 1., 1., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0.]]]], dtype=float32), array([[24,  0,  0,  0,  0,  0,  0,  0],
       [17, 15, 18,  3,  8, 24,  0,  0],
       [12, 24,  0,  0,  0,  0,  0,  0],
       [ 4,  2, 13,  5, 18, 17, 24,  0],
       [ 4, 17,  1,  1, 19, 15, 24,  0]], dtype=int32), array([[1, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 0, 0],
       [1, 1, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 1, 1, 0]], dtype=int32), array([1., 1., 1., 1., 1.])

NameError: name 'int32' is not defined

In [51]:
res[0][0]

array([[ 0.12985384, -0.05662343, -0.11909319,  0.00189542,  0.04536017,
        -0.06319071, -0.09594958, -0.07244516, -0.10270457, -0.01372335,
        -0.17147815,  0.0445722 , -0.0996851 , -0.07377958,  0.00318583,
        -0.06331493, -0.06666111, -0.10051291, -0.1571152 , -0.23768124,
        -0.36141938, -0.5130678 , -0.46846354, -0.48073658,  0.30757928],
       [ 0.12510611,  0.00474207, -0.04687506, -0.04700482, -0.00259208,
        -0.09735098, -0.07169082, -0.09542734, -0.08752679, -0.03643619,
        -0.15307671,  0.01231386, -0.104004  , -0.06497027, -0.12382452,
        -0.07617463, -0.07195999, -0.18000549, -0.12019446, -0.16364951,
        -0.41342252, -0.55366623, -0.4821875 , -0.5616549 ,  0.28721803],
       [ 0.12469967,  0.01398506, -0.02732444, -0.05710077, -0.01429167,
        -0.09974872, -0.06526627, -0.09693623, -0.08446896, -0.03519835,
        -0.14196305,  0.00201554, -0.11138438, -0.05702129, -0.14450859,
        -0.07705998, -0.08142145, -0.18872176, -0