## I referred to the following webpages for the implementation.
- Implementation of Transformer<br>
https://qiita.com/halhorn/items/c91497522be27bde17ce<br>
https://github.com/kpot/keras-transformer/tree/master/keras_transformer<br>
https://github.com/Lsdefine/attention-is-all-you-need-keras<br>
- Usage of "\_\_call\_\_" method<br>
https://qiita.com/kyo-bad/items/439d8cc3a0424c45214a

In [1]:
import numpy as np
import math

import tensorflow as tf

from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Layer, Embedding, Input, Reshape, Lambda, Add
from keras import backend as K
from keras.initializers import RandomNormal
from keras.utils import plot_model
from keras.optimizers import Adam
from keras.callbacks import Callback

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [32]:
vocab_size = 8000
d_model = 512
MAX_LEN = 716
class_num = 9
PAD_ID = 0
warmup_steps = 4000
NUM_TRAIN = 5893
NUM_TEST = 1474
batch_size = 16
epochs = 1500

In [4]:
def parse(example):
    features = tf.parse_single_example(
        example,
        features={
            "X": tf.FixedLenFeature([MAX_LEN], dtype=tf.float32),
            "Y": tf.FixedLenFeature((class_num,), dtype=tf.float32)
        }
    )
    
    X = features["X"]
    Y = features["Y"]
    
    return X, Y

In [5]:
def iterator(tfrecord_path, data_len):
    dataset = tf.data.TFRecordDataset([tfrecord_path]).map(parse)
    dataset = dataset.repeat(-1).batch(data_len)
    iterator = dataset.make_one_shot_iterator()
    
    X, Y = iterator.get_next()
    X = tf.reshape(X, [-1, MAX_LEN])
    
    return X, Y

In [6]:
%%time

x_train, y_train = iterator("train_transformer_with_sp.tfrecord", NUM_TRAIN)
x_test, y_test = iterator("test_transformer_with_sp.tfrecord", NUM_TEST)

x_train = tf.Session().run(x_train)
y_train = tf.Session().run(y_train)
x_test = tf.Session().run(x_test)
y_test = tf.Session().run(y_test)

CPU times: user 2.11 s, sys: 1.38 s, total: 3.49 s
Wall time: 2.49 s


In [7]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((5893, 716), (5893, 9), (1474, 716), (1474, 9))

## Transformer Architecture

In [8]:
class MultiheadAttention():
    ## hidden_dim has to be multiples of head_num
    def __init__(self, max_len, hidden_dim=512, head_num=8, dropout_rate=0.1, *args, **kwargs):
        self.max_len = max_len
        self.hidden_dim = hidden_dim
        self.head_num = head_num
        self.dropout_rate = dropout_rate
        
        self.q_dense_layer = Dense(hidden_dim, use_bias=False)
        self.k_dense_layer = Dense(hidden_dim, use_bias=False)
        self.v_dense_layer = Dense(hidden_dim, use_bias=False)
        self.output_dense_layer = Dense(hidden_dim, use_bias=False)
        self.attention_dropout_layer = Dropout(dropout_rate)
        
    def split_heads(self, x):
        def reshape(x):
            x = tf.reshape(x, [-1, self.max_len, self.head_num, self.hidden_dim // self.head_num])
            return tf.transpose(x, [0, 2, 1, 3])
        
        out = Lambda(reshape)(x)
        return out
    
    def combine_heads(self, heads):
        def reshape(x):
            heads = tf.transpose(x, [0, 2, 1, 3])
            return tf.reshape(x, [-1, self.max_len, self.hidden_dim])
        
        out = Lambda(reshape)(heads)
        return out
        
    def __call__(self, query, memory):
        #two arguments of query and memory are already encoded as embedded vectors for all words
        q = self.q_dense_layer(query)
        k = self.k_dense_layer(memory)
        v = self.v_dense_layer(memory)
        
        q = self.split_heads(q)
        k = self.split_heads(k)
        v = self.split_heads(v)
        
        #for scaled dot-product
        depth_inside_each_head = self.hidden_dim // self.head_num
        q = Lambda(lambda x: x * (depth_inside_each_head ** -0.5))(q)
        
        #q.shape = (batch_size, head_num, query_len, emb_dim)
        #k.shape = (batch_size, head_num, memory_len, emb_dim)
        #batch_dot(q, k).shape = (batch_size, head_num, query_len, memory_len)
        score = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[3, 3]))([q, k])
        normalized_score = Activation("softmax")(score)
        normalized_score = self.attention_dropout_layer(normalized_score)
        
        #normalized_score.shape = (batch_size, head_num, query_length, memory_length)
        #v.shape = (batch_size, head_num, memory_length, depth)
        #attention_weighted_output.shape = (batch_size, head_num, query_length, depth)
        attention_weighted_output = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[3, 2]))([normalized_score, v])
        attention_weighted_output = self.combine_heads(attention_weighted_output)
        return self.output_dense_layer(attention_weighted_output)

In [9]:
# SlefAttention class inherits MultiheadAttention class so that it can make query and memory come from the same source.
class SelfAttention(MultiheadAttention):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def __call__(self, query):
        return super().__call__(query, query)

In [10]:
class PositionwiseFeedForwardNetwork():
    
    def __init__(self, hidden_dim, dropout_rate, *args, **kwargs):
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate
        
        self.first_dense_layer = Dense(hidden_dim*4, use_bias=True, activation="relu")
        self.second_dense_layer = Dense(hidden_dim, use_bias=True, activation="linear")
        self.dropout_layer = Dropout(dropout_rate)
        
    def __call__(self, inputs):
        # make the network more flexible to learn for the first dense layer(non-linear transformation is used),
        # and put the network back into the same hidden dim as original(linear transformation is used)
        x = self.first_dense_layer(inputs)
        x = self.dropout_layer(x)
        return self.second_dense_layer(x)

In [11]:
class LayerNormalization(Layer):
    def __init__(self, axis=-1, **kwargs):
        self.axis = axis
        super(LayerNormalization, self).__init__(**kwargs)
        
    def get_config(self):
        config = super().get_config()
        config["axis"] = self.axis
        return config
    
    def build(self, input_shape):
        hidden_dim = input_shape[-1]
        self.scale = self.add_weight("layer_norm_scale", shape=[hidden_dim],
                                    initializer="ones")
        self.shift = self.add_weight("layer_norm_shift", shape=[hidden_dim],
                                    initializer="zeros")
        super(LayerNormalization, self).build(input_shape)
        
    def call(self, inputs, epsilon=1e-6):
        mean = K.mean(inputs, axis=[-1], keepdims=True)
        variance = K.var(inputs, axis=[-1], keepdims=True)
        normalized_inputs = (inputs - mean) / (K.sqrt(variance) + epsilon)
        return normalized_inputs * self.scale + self.shift
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [12]:
class PreLayerNormPostResidualConnectionWrapper():
    def __init__(self, layer, dropout_rate, *args, **kwargs):
        self.layer = layer
        self.layer_norm = LayerNormalization()
        self.dropout_layer = Dropout(dropout_rate)
        
    def __call__(self, inputs, *args, **kwargs):
        x = self.layer_norm(inputs)
        x = self.layer(x)
        outputs = self.dropout_layer(x)
        results = Add()([inputs, outputs])
        return results

In [13]:
class AddPositionalEncoding(Layer): 
    def call(self, inputs):
        data_type = inputs.dtype
        batch_size, max_len, emb_dim = tf.unstack(tf.shape(inputs))
        # i is from 0 to 255 when emb_dim is 512
        #so the doubled_i is from 0 to 510
        doubled_i = K.arange(emb_dim) // 2 * 2
        exponent = K.tile(K.expand_dims(doubled_i, 0), [max_len, 1])
        denominator_matrix = K.pow(10000.0, K.cast(exponent / emb_dim, data_type))
        
        # since cos(x) = sin(x + π/2), we convert the series of [sin, cos, sin, cos, ...]
        # into [sin, sin, sin, sin, ...]
        to_convert = K.cast(K.arange(emb_dim) % 2, data_type) * math.pi / 2
        convert_matrix = K.tile(tf.expand_dims(to_convert, 0), [max_len, 1])
        
        seq_pos = K.arange(max_len)
        numerator_matrix = K.cast(K.tile(K.expand_dims(seq_pos, 1), [1, emb_dim]), data_type)
        
        positinal_encoding = K.sin(numerator_matrix / denominator_matrix + convert_matrix)
        batched_positional_encoding = K.tile(K.expand_dims(positinal_encoding, 0), [batch_size, 1, 1])
        return inputs + batched_positional_encoding
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [14]:
class MakeZeroPads(Layer):
    def __init__(self, seq_len, vocab_size, emb_dim, data_type="float32", *args, **kwargs):
        self.emb_dim = emb_dim
        super(MakeZeroPads, self).__init__(*args, **kwargs)
        
    def call(self, inputs):
        mask_for_pads = tf.to_float(tf.not_equal(inputs, PAD_ID))
        pads_masked_embedding = inputs * mask_for_pads
        return pads_masked_embedding * (self.emb_dim ** 0.5)
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [15]:
class Encoder():
    def __init__(self, vocab_size, max_len, stack_num, head_num, emb_dim, dropout_rate, *args, **kwargs):
        self.vocab_size = vocab_size
        self.max_len = max_len
        self.stack_num = stack_num
        self.head_num = head_num
        self.emb_dim = emb_dim
        self.dropout_rate = dropout_rate
        
        self.embedding_layer = Embedding(self.vocab_size,
                           self.emb_dim,
                           embeddings_initializer=RandomNormal(mean=0.0, stddev=self.emb_dim**-0.5)
                          )
        self.make_zero_pads_layer = MakeZeroPads(self.max_len, vocab_size, emb_dim)
        self.add_pos_enc_layer = AddPositionalEncoding()
        self.input_dropout_layer = Dropout(dropout_rate)
        
        self.attention_block_list = []
        for _ in range(stack_num):
            self_attention_layer = SelfAttention(self.max_len, self.emb_dim, self.head_num, self.dropout_rate)
            pffn_layer = PositionwiseFeedForwardNetwork(self.emb_dim, self.dropout_rate)
            self.attention_block_list.append([
                PreLayerNormPostResidualConnectionWrapper(self_attention_layer, dropout_rate),
                PreLayerNormPostResidualConnectionWrapper(pffn_layer, dropout_rate)
            ])
        self.output_layer_norm = LayerNormalization()
        
    def __call__(self, inputs):
        x = self.embedding_layer(inputs)
        x = self.make_zero_pads_layer(x)
        x = self.add_pos_enc_layer(x)
        x = self.input_dropout_layer(x)
        
        for i, set_of_layers_list in enumerate(self.attention_block_list):
            self_attention_layer, pffn_layer = tuple(set_of_layers_list)
            x = self_attention_layer(x)
            x = pffn_layer(x)
            
        return self.output_layer_norm(x)

In [16]:
# Transformer classification model
inputs = Input((MAX_LEN,))
transformer_encoder = Encoder(vocab_size=vocab_size, stack_num=6, head_num=8, emb_dim=512, dropout_rate=0.1, max_len=MAX_LEN)
encoder_output = transformer_encoder(inputs)
#Since the task is text classification, we just need the first token for each sample
summarized_vecs = Lambda(lambda x: x[:, 0, :])(encoder_output)
outputs = Dense(class_num)(summarized_vecs)
model = Model(inputs, outputs)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 716)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 716, 512)     4096000     input_1[0][0]                    
__________________________________________________________________________________________________
make_zero_pads_1 (MakeZeroPads) (None, 716, 512)     0           embedding_1[0][0]                
__________________________________________________________________________________________________
add_positional_encoding_1 (AddP (None, 716, 512)     0           make_zero_pads_1[0][0]           
__________________________________________________________________________________________________
dropout_1 

In [16]:
plot_model(model, to_file="transformer_encoder.png", show_shapes=True, show_layer_names=True)

In [17]:
#learning rate for Adam
class LRSchedulerPerStep(Callback):
    def __init__(self, d_model, warmup_steps=4000):
        self.d_model = d_model
        self.warmup_step = warmup_steps
        self.step_num = 0
        
    def on_batch_begin(self, batch, logs=None):
        self.step_num += 1
        updated_lr = self.d_model * min(self.step_num ** (-0.5), self.step_num * (self.warmup_step ** (-1.5)))
        K.set_value(self.model.optimizer.lr, updated_lr)

In [18]:
lr_scheduler = LRSchedulerPerStep(d_model, warmup_steps)

In [19]:
opt = Adam(beta_1=0.9, beta_2=0.98, epsilon=1e-9)

## About customized loss function
https://stackoverflow.com/questions/50063613/add-loss-function-in-keras<br>
https://github.com/kpot/keras-transformer/blob/b9d4e76c535c0c62cadc73e37416e4dc18b635ca/keras_transformer/bert.py#L212<br>
https://github.com/tensorflow/models/blob/b9ef963d1e84da0bb9c0a6039457c39a699ea149/official/transformer/v2/metrics.py#L47<br>

In [20]:
# smoothed loss function
class SmoothedLossSparseCategoricalXEntropy:
    def __init__(self, smoothing, class_num):
        self.smoothing = smoothing
        self.class_num = class_num
        
    def __call__(self, y_true, y_pred):
        confidence = 1.0 - self.smoothing
        low_confidence = (1.0 - confidence) / tf.cast(self.class_num - 1, tf.float32)
        smoothed_labels = tf.one_hot(
            tf.cast(y_true, tf.int32),
            depth=self.class_num,
            on_value=confidence,
            off_value=low_confidence
        )
        xentropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=y_pred,
            labels=smoothed_labels
        )
        
        lowest_loss = -(confidence * tf.log(confidence) + 
                       tf.cast(self.class_num -1, tf.float32) * low_confidence * tf.log(low_confidence + 1e-20))
        final_loss = xentropy - lowest_loss
        return final_loss

In [21]:
y_train = np.argmax(y_train, axis=-1)
y_test = np.argmax(y_test, axis=-1)
y_train.shape, y_test.shape

((5893,), (1474,))

In [24]:
model.compile(optimizer=opt, loss=SmoothedLossSparseCategoricalXEntropy(smoothing=0.1, class_num=class_num))

In [None]:
model.compile(optimizer=opt, loss=categorical_crossentropy)

In [33]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), callbacks=[lr_scheduler])

Train on 5893 samples, validate on 1474 samples
Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500
Epoch 46/1500
Epoch 47/1500
Epoch 48/1500
Epoch 49/1500
Epoch 50/1500
Epoch 51/1500
Epoch 52/1500
Epoch 53/1500
Epoch 54/1500
Epoch 55/1500
Epoch 56/1500
Epoch 57/1500
Epoch 58/1500
Epoch 59/1500
Epoch 60/1500
Epoch 61/1500
Epoch 62/1500
Epoch 63/1500
Epoch 64/1500
Epoch 65/1500
Epoch 66/1500
Epoch 67/1500
Epoch 68/1500
Epoch 69/

Epoch 76/1500
Epoch 77/1500
Epoch 78/1500
Epoch 79/1500
Epoch 80/1500
Epoch 81/1500
Epoch 82/1500
Epoch 83/1500
Epoch 84/1500
Epoch 85/1500
Epoch 86/1500
Epoch 87/1500
Epoch 88/1500
Epoch 89/1500
Epoch 90/1500
Epoch 91/1500
Epoch 92/1500
Epoch 93/1500
Epoch 94/1500
Epoch 95/1500
Epoch 96/1500
Epoch 97/1500
Epoch 98/1500
Epoch 99/1500
Epoch 100/1500
Epoch 101/1500
Epoch 102/1500
Epoch 103/1500
Epoch 104/1500
Epoch 105/1500
Epoch 106/1500
Epoch 107/1500
Epoch 108/1500
Epoch 109/1500
Epoch 110/1500
Epoch 111/1500
Epoch 112/1500
Epoch 113/1500
Epoch 114/1500
Epoch 115/1500
Epoch 116/1500
Epoch 117/1500
Epoch 118/1500
Epoch 119/1500
Epoch 120/1500
Epoch 121/1500
Epoch 122/1500
Epoch 123/1500
Epoch 124/1500
Epoch 125/1500
Epoch 126/1500
Epoch 127/1500
Epoch 128/1500
Epoch 129/1500
Epoch 130/1500
Epoch 131/1500
Epoch 132/1500
Epoch 133/1500
Epoch 134/1500
Epoch 135/1500
Epoch 136/1500
Epoch 137/1500
Epoch 138/1500
Epoch 139/1500
Epoch 140/1500
Epoch 141/1500
Epoch 142/1500
Epoch 143/1500
Epoc

Epoch 150/1500
Epoch 151/1500
Epoch 152/1500
Epoch 153/1500
Epoch 154/1500
Epoch 155/1500
Epoch 156/1500
Epoch 157/1500
Epoch 158/1500
Epoch 159/1500
Epoch 160/1500
Epoch 161/1500
Epoch 162/1500
Epoch 163/1500
Epoch 164/1500
Epoch 165/1500
Epoch 166/1500
Epoch 167/1500
Epoch 168/1500
Epoch 169/1500
Epoch 170/1500
Epoch 171/1500
Epoch 172/1500
Epoch 173/1500
Epoch 174/1500
Epoch 175/1500
Epoch 176/1500
Epoch 177/1500
Epoch 178/1500
Epoch 179/1500
Epoch 180/1500
Epoch 181/1500
Epoch 182/1500
Epoch 183/1500
Epoch 184/1500
Epoch 185/1500
Epoch 186/1500
Epoch 187/1500
Epoch 188/1500
Epoch 189/1500
Epoch 190/1500
Epoch 191/1500
Epoch 192/1500
Epoch 193/1500
Epoch 194/1500
Epoch 195/1500
Epoch 196/1500
Epoch 197/1500
Epoch 198/1500
Epoch 199/1500
Epoch 200/1500
Epoch 201/1500
Epoch 202/1500
Epoch 203/1500
Epoch 204/1500
Epoch 205/1500
Epoch 206/1500
Epoch 207/1500
Epoch 208/1500
Epoch 209/1500
Epoch 210/1500
Epoch 211/1500
Epoch 212/1500
Epoch 213/1500
Epoch 214/1500
Epoch 215/1500
Epoch 216/

Epoch 223/1500
Epoch 224/1500
Epoch 225/1500
Epoch 226/1500
Epoch 227/1500
Epoch 228/1500
Epoch 229/1500
Epoch 230/1500
Epoch 231/1500
Epoch 232/1500
Epoch 233/1500
Epoch 234/1500
Epoch 235/1500
Epoch 236/1500
Epoch 237/1500
Epoch 238/1500
Epoch 239/1500
Epoch 240/1500
Epoch 241/1500
Epoch 242/1500
Epoch 243/1500
Epoch 244/1500
Epoch 245/1500
Epoch 246/1500
Epoch 247/1500
Epoch 248/1500
Epoch 249/1500
Epoch 250/1500
Epoch 251/1500
Epoch 252/1500
Epoch 253/1500
Epoch 254/1500
Epoch 255/1500
Epoch 256/1500
Epoch 257/1500
Epoch 258/1500
Epoch 259/1500
Epoch 260/1500
Epoch 261/1500
Epoch 262/1500
Epoch 263/1500
Epoch 264/1500
Epoch 265/1500
Epoch 266/1500
Epoch 267/1500
Epoch 268/1500
Epoch 269/1500
Epoch 270/1500
Epoch 271/1500
Epoch 272/1500
Epoch 273/1500
Epoch 274/1500
Epoch 275/1500
Epoch 276/1500
Epoch 277/1500
Epoch 278/1500
Epoch 279/1500
Epoch 280/1500
Epoch 281/1500
Epoch 282/1500
Epoch 283/1500
Epoch 284/1500
Epoch 285/1500
Epoch 286/1500
Epoch 287/1500
Epoch 288/1500
Epoch 289/

Epoch 297/1500
Epoch 298/1500
Epoch 299/1500
Epoch 300/1500
Epoch 301/1500
Epoch 302/1500
Epoch 303/1500
Epoch 304/1500
Epoch 305/1500
Epoch 306/1500
Epoch 307/1500
Epoch 308/1500
Epoch 309/1500
Epoch 310/1500
Epoch 311/1500
Epoch 312/1500
Epoch 313/1500
Epoch 314/1500
Epoch 315/1500
Epoch 316/1500
Epoch 317/1500
Epoch 318/1500
Epoch 319/1500
Epoch 320/1500
Epoch 321/1500
Epoch 322/1500
Epoch 323/1500
Epoch 324/1500
Epoch 325/1500
Epoch 326/1500
Epoch 327/1500
Epoch 328/1500
Epoch 329/1500
Epoch 330/1500
Epoch 331/1500
Epoch 332/1500
Epoch 333/1500
Epoch 334/1500
Epoch 335/1500
Epoch 336/1500
Epoch 337/1500
Epoch 338/1500
Epoch 339/1500
Epoch 340/1500
Epoch 341/1500
Epoch 342/1500
Epoch 343/1500
Epoch 344/1500
Epoch 345/1500
Epoch 346/1500
Epoch 347/1500
Epoch 348/1500
Epoch 349/1500
Epoch 350/1500
Epoch 351/1500
Epoch 352/1500
Epoch 353/1500
Epoch 354/1500
Epoch 355/1500
Epoch 356/1500
Epoch 357/1500
Epoch 358/1500
Epoch 359/1500
Epoch 360/1500
Epoch 361/1500
Epoch 362/1500
Epoch 363/

Epoch 371/1500
Epoch 372/1500
Epoch 373/1500
Epoch 374/1500
Epoch 375/1500
Epoch 376/1500
Epoch 377/1500
Epoch 378/1500
Epoch 379/1500
Epoch 380/1500
Epoch 381/1500
Epoch 382/1500
Epoch 383/1500
Epoch 384/1500
Epoch 385/1500
Epoch 386/1500
Epoch 387/1500
Epoch 388/1500
Epoch 389/1500
Epoch 390/1500
Epoch 391/1500
Epoch 392/1500
Epoch 393/1500
Epoch 394/1500
Epoch 395/1500
Epoch 396/1500
Epoch 397/1500
Epoch 398/1500
Epoch 399/1500
Epoch 400/1500
Epoch 401/1500
Epoch 402/1500
Epoch 403/1500
Epoch 404/1500
Epoch 405/1500
Epoch 406/1500
Epoch 407/1500
Epoch 408/1500
Epoch 409/1500
Epoch 410/1500
Epoch 411/1500
Epoch 412/1500
Epoch 413/1500
Epoch 414/1500
Epoch 415/1500
Epoch 416/1500
Epoch 417/1500
Epoch 418/1500
Epoch 419/1500
Epoch 420/1500
Epoch 421/1500
Epoch 422/1500
Epoch 423/1500
Epoch 424/1500
Epoch 425/1500
Epoch 426/1500
Epoch 427/1500
Epoch 428/1500
Epoch 429/1500
Epoch 430/1500
Epoch 431/1500
Epoch 432/1500
Epoch 433/1500
Epoch 434/1500
Epoch 435/1500
Epoch 436/1500
Epoch 437/

Epoch 445/1500
Epoch 446/1500
Epoch 447/1500
Epoch 448/1500
Epoch 449/1500
Epoch 450/1500
Epoch 451/1500
Epoch 452/1500
Epoch 453/1500
Epoch 454/1500
Epoch 455/1500
Epoch 456/1500
Epoch 457/1500
Epoch 458/1500
Epoch 459/1500
Epoch 460/1500
Epoch 461/1500
Epoch 462/1500
Epoch 463/1500
Epoch 464/1500
Epoch 465/1500
Epoch 466/1500
Epoch 467/1500
Epoch 468/1500
Epoch 469/1500
Epoch 470/1500
Epoch 471/1500
Epoch 472/1500
Epoch 473/1500
Epoch 474/1500
Epoch 475/1500
Epoch 476/1500
Epoch 477/1500
Epoch 478/1500
Epoch 479/1500
Epoch 480/1500
Epoch 481/1500
Epoch 482/1500
Epoch 483/1500
Epoch 484/1500
Epoch 485/1500
Epoch 486/1500
Epoch 487/1500
Epoch 488/1500
Epoch 489/1500
Epoch 490/1500
Epoch 491/1500
Epoch 492/1500
Epoch 493/1500
Epoch 494/1500
Epoch 495/1500
Epoch 496/1500
Epoch 497/1500
Epoch 498/1500
Epoch 499/1500
Epoch 500/1500
Epoch 501/1500
Epoch 502/1500
Epoch 503/1500
Epoch 504/1500
Epoch 505/1500
Epoch 506/1500
Epoch 507/1500
Epoch 508/1500
Epoch 509/1500
Epoch 510/1500
Epoch 511/

Epoch 519/1500
Epoch 520/1500
Epoch 521/1500
Epoch 522/1500
Epoch 523/1500
Epoch 524/1500
Epoch 525/1500
Epoch 526/1500
Epoch 527/1500
Epoch 528/1500
Epoch 529/1500
Epoch 530/1500
Epoch 531/1500
Epoch 532/1500
Epoch 533/1500
Epoch 534/1500
Epoch 535/1500
Epoch 536/1500
Epoch 537/1500
Epoch 538/1500
Epoch 539/1500
Epoch 540/1500
Epoch 541/1500
Epoch 542/1500
Epoch 543/1500
Epoch 544/1500
Epoch 545/1500
Epoch 546/1500
Epoch 547/1500
Epoch 548/1500
Epoch 549/1500
Epoch 550/1500
Epoch 551/1500
Epoch 552/1500
Epoch 553/1500
Epoch 554/1500
Epoch 555/1500
Epoch 556/1500
Epoch 557/1500
Epoch 558/1500
Epoch 559/1500
Epoch 560/1500
Epoch 561/1500
Epoch 562/1500
Epoch 563/1500
Epoch 564/1500
Epoch 565/1500
Epoch 566/1500
Epoch 567/1500
Epoch 568/1500
Epoch 569/1500
Epoch 570/1500
Epoch 571/1500
Epoch 572/1500
Epoch 573/1500
Epoch 574/1500
Epoch 575/1500
Epoch 576/1500
Epoch 577/1500
Epoch 578/1500
Epoch 579/1500
Epoch 580/1500
Epoch 581/1500
Epoch 582/1500
Epoch 583/1500
Epoch 584/1500
Epoch 585/

Epoch 593/1500
Epoch 594/1500
Epoch 595/1500
Epoch 596/1500
Epoch 597/1500
Epoch 598/1500
Epoch 599/1500
Epoch 600/1500
Epoch 601/1500
Epoch 602/1500
Epoch 603/1500
Epoch 604/1500
Epoch 605/1500
Epoch 606/1500
Epoch 607/1500
Epoch 608/1500
Epoch 609/1500
Epoch 610/1500
Epoch 611/1500
Epoch 612/1500
Epoch 613/1500
Epoch 614/1500
Epoch 615/1500
Epoch 616/1500
Epoch 617/1500
Epoch 618/1500
Epoch 619/1500
Epoch 620/1500
Epoch 621/1500
Epoch 622/1500
Epoch 623/1500
Epoch 624/1500
Epoch 625/1500
Epoch 626/1500
Epoch 627/1500
Epoch 628/1500
Epoch 629/1500
Epoch 630/1500
Epoch 631/1500
Epoch 632/1500
Epoch 633/1500
Epoch 634/1500
Epoch 635/1500
Epoch 636/1500
Epoch 637/1500
Epoch 638/1500
Epoch 639/1500
Epoch 640/1500
Epoch 641/1500
Epoch 642/1500
Epoch 643/1500
Epoch 644/1500
Epoch 645/1500
Epoch 646/1500
Epoch 647/1500
Epoch 648/1500
Epoch 649/1500
Epoch 650/1500
Epoch 651/1500
Epoch 652/1500
Epoch 653/1500
Epoch 654/1500
Epoch 655/1500
Epoch 656/1500
Epoch 657/1500
Epoch 658/1500
Epoch 659/

Epoch 667/1500
Epoch 668/1500
Epoch 669/1500
Epoch 670/1500
Epoch 671/1500
Epoch 672/1500
Epoch 673/1500
Epoch 674/1500
Epoch 675/1500
Epoch 676/1500
Epoch 677/1500
Epoch 678/1500
Epoch 679/1500
Epoch 680/1500
Epoch 681/1500
Epoch 682/1500
Epoch 683/1500
Epoch 684/1500
Epoch 685/1500
Epoch 686/1500
Epoch 687/1500
Epoch 688/1500
Epoch 689/1500
Epoch 690/1500
Epoch 691/1500
Epoch 692/1500
Epoch 693/1500
Epoch 694/1500
Epoch 695/1500
Epoch 696/1500
Epoch 697/1500
Epoch 698/1500
Epoch 699/1500
Epoch 700/1500
Epoch 701/1500
Epoch 702/1500
Epoch 703/1500
Epoch 704/1500
Epoch 705/1500
Epoch 706/1500
Epoch 707/1500
Epoch 708/1500
Epoch 709/1500
Epoch 710/1500
Epoch 711/1500
Epoch 712/1500
Epoch 713/1500
Epoch 714/1500
Epoch 715/1500
Epoch 716/1500
Epoch 717/1500
Epoch 718/1500
Epoch 719/1500
Epoch 720/1500
Epoch 721/1500
Epoch 722/1500
Epoch 723/1500
Epoch 724/1500
Epoch 725/1500
Epoch 726/1500
Epoch 727/1500
Epoch 728/1500
Epoch 729/1500
Epoch 730/1500
Epoch 731/1500
Epoch 732/1500
Epoch 733/

Epoch 741/1500
Epoch 742/1500
Epoch 743/1500
Epoch 744/1500
Epoch 745/1500
Epoch 746/1500
Epoch 747/1500
Epoch 748/1500
Epoch 749/1500
Epoch 750/1500
Epoch 751/1500
Epoch 752/1500
Epoch 753/1500
Epoch 754/1500
Epoch 755/1500
Epoch 756/1500
Epoch 757/1500
Epoch 758/1500
Epoch 759/1500
Epoch 760/1500
Epoch 761/1500
Epoch 762/1500
Epoch 763/1500
Epoch 764/1500
Epoch 765/1500
Epoch 766/1500
Epoch 767/1500
Epoch 768/1500
Epoch 769/1500
Epoch 770/1500
Epoch 771/1500
Epoch 772/1500
Epoch 773/1500
Epoch 774/1500
Epoch 775/1500
Epoch 776/1500
Epoch 777/1500
Epoch 778/1500
Epoch 779/1500
Epoch 780/1500
Epoch 781/1500
Epoch 782/1500
Epoch 783/1500
Epoch 784/1500
Epoch 785/1500
Epoch 786/1500
Epoch 787/1500
Epoch 788/1500
Epoch 789/1500
Epoch 790/1500
Epoch 791/1500
Epoch 792/1500
Epoch 793/1500
Epoch 794/1500
Epoch 795/1500
Epoch 796/1500
Epoch 797/1500
Epoch 798/1500
Epoch 799/1500
Epoch 800/1500
Epoch 801/1500
Epoch 802/1500
Epoch 803/1500
Epoch 804/1500
Epoch 805/1500
Epoch 806/1500
Epoch 807/

Epoch 815/1500
Epoch 816/1500
Epoch 817/1500
Epoch 818/1500
Epoch 819/1500
Epoch 820/1500
Epoch 821/1500

KeyboardInterrupt: 