# Transformer (tylko decoder), inny encoding. 5 featerow - mag, x, y, depth, diff_days

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
SEED = 1337
tf.random.set_seed(SEED)
np.random.seed(SEED)

2023-11-13 18:35:53.318579: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-13 18:35:53.318674: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-13 18:35:53.321605: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-13 18:35:53.599336: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
ds_train = tf.data.Dataset.load("../data/ds_final_train/")
ds_val = tf.data.Dataset.load("../data/ds_final_val/")
ds_test = tf.data.Dataset.load("../data/ds_final_test/")

2023-11-13 18:35:59.914107: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-13 18:35:59.993262: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-13 18:35:59.993354: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-13 18:35:59.997032: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-13 18:35:59.997163: I tensorflow/compile

In [3]:
ds_train.cardinality().numpy(), ds_val.cardinality().numpy(), ds_test.cardinality().numpy()

(3435216, 490313, 106479)

In [4]:
BATCH_SIZE = 1024
BLOCK_SIZE = 64

In [5]:
def transform_element(features, context, labels):
    # transform lebels from (1,) to (2,)
    labels = tf.cast(labels, tf.int32)
    labels = tf.one_hot(labels, depth=2)
    return (features, context), labels[0]

In [6]:
ds_train = ds_train.map(transform_element)
ds_val = ds_val.map(transform_element)
ds_test = ds_test.map(transform_element)

In [7]:
for x, y in ds_train.take(1):
    print(x[0].shape)
    print(x[1].shape)
    print(y.shape)

(64, 7)
(4,)
(2,)


In [8]:
ds_train = ds_train.batch(BATCH_SIZE).shuffle(2000, seed=SEED).prefetch(tf.data.experimental.AUTOTUNE)
ds_val = ds_val.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [9]:
class PositionalEmbedding(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super().__init__()
        self.d_model = d_model
        self.lstm = tf.keras.layers.LSTM(d_model // 7 * 5, return_sequences=True)
        self.embed_dd = tf.keras.layers.Embedding(100, d_model // 7)
        self.embed_plate = tf.keras.layers.Embedding(100, d_model // 7)
        self.conc = tf.keras.layers.Concatenate()
        self.pos_encoding = tf.keras.layers.Embedding(100, d_model)

    def call(self, x):
        cont, plate, dd = x[:,:,:-2], x[:,:,-2], x[:,:,-1] 
        x1 = self.lstm(cont)
        x2 = self.embed_dd(dd)
        x3 = self.embed_plate(plate)
        x = self.conc([x1, x2, x3])
        x_pos = self.pos_encoding(tf.range(x1.shape[1]))
        x = x + x_pos
        return x

In [10]:
class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

In [11]:
class GlobalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x)
        x = self.add([x, attn_output])
        x = self.layernorm(x)
        return x

In [12]:
class CrossAttention(BaseAttention):
	def call(self, x, context):
		attn_output = self.mha(
			query=x,
			key=context,
			value=context)
		x = self.add([x, attn_output])
		x = self.layernorm(x)
		return x

In [13]:
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, dff, dropout_rate=0.1):
        super().__init__()
        self.seq = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model),
            tf.keras.layers.Dropout(dropout_rate)
        ])
        self.add = tf.keras.layers.Add()
        self.layer_norm = tf.keras.layers.LayerNormalization()

    def call(self, x):
        x = self.add([x, self.seq(x)])
        x = self.layer_norm(x) 
        return x

In [14]:
class EncoderBlock(tf.keras.layers.Layer):
	def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
		super().__init__()

		self.self_attention = GlobalSelfAttention(
			num_heads=num_heads,
			key_dim=d_model,
			dropout=dropout_rate)

		self.ffn = FeedForward(d_model, dff)

	def call(self, x):
		x = self.self_attention(x)
		x = self.ffn(x)
		return x

In [15]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads,
               dff, dropout_rate=0.1):
        super().__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.dense = tf.keras.layers.Dense(d_model // 4 * 3)
        self.embed_plate = tf.keras.layers.Embedding(100, d_model // 4)
        self.conc = tf.keras.layers.Concatenate()
        self.enc_blocks = [
            EncoderBlock(d_model=d_model,
                        num_heads=num_heads,
                        dff=dff,
                        dropout_rate=dropout_rate)
            for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(dropout_rate)

    def call(self, x):
        x = self.dropout(x)
        x = tf.reshape(x, (-1, 4, 1))
        cont, plate = x[:,:,:-1], x[:,:,-1]
        x1 = self.dense(cont)
        x2 = self.embed_plate(plate)
        x = self.conc([x1, x2])
        for block in self.enc_blocks:
            x = block(x)

        return x  # Shape `(batch_size, seq_len, d_model)`.

In [16]:
class DecoderBlock(tf.keras.layers.Layer):
    def __init__(self,
                d_model,
                num_heads,
                dff,
                dropout_rate=0.1):
        super(DecoderBlock, self).__init__()

        self.causal_self_attention = GlobalSelfAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.cross_attention = CrossAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate)

        self.ffn = FeedForward(d_model, dff)

    def call(self, x, context):
        x = self.causal_self_attention(x=x)
        x = self.cross_attention(x=x, context=context)
        x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
        return x

In [17]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff, dropout_rate=0.1):
        super(Decoder, self).__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.pos_embedding = PositionalEmbedding(d_model=d_model)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.dec_blocks = [
            DecoderBlock(d_model=d_model, num_heads=num_heads,
                        dff=dff, dropout_rate=dropout_rate)
            for _ in range(num_layers)]

    def call(self, x, context):
        x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)
        x = self.dropout(x)
        for block in self.dec_blocks:
            x = block(x, context)
        # The shape of x is (batch_size, target_seq_len, d_model).
        return x

In [18]:
class Transformer(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads,
                dropout_rate=0.1):
        super().__init__()
        self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
                            num_heads=num_heads, dff=4*d_model,
                            dropout_rate=dropout_rate)

        self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
                           num_heads=num_heads, dff=4*d_model,
                           dropout_rate=dropout_rate)

        self.final_layer = tf.keras.layers.Dense(2)

    def call(self, inputs):
        x, context = inputs
        context = self.encoder(context)
        x = self.decoder(x, context) # (batch_size, target_len, d_model)
        x = x[:, -1, :]
        logits = self.final_layer(x)  # (batch_size, 1, target_vocab_size)
        return logits

In [19]:
model = Transformer(num_layers=2, d_model=84, num_heads=4)

In [20]:
model.build(input_shape=[(None, 64, 7), (None, 4)])
model.count_params()

942053

In [21]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0004),
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy', tf.keras.metrics.F1Score()])

In [22]:
history = model.fit(ds_train, epochs=20, validation_data=ds_val)

Epoch 1/20


2023-11-13 18:36:38.791956: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 154 of 2000
2023-11-13 18:36:48.798875: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 311 of 2000
2023-11-13 18:37:08.758732: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 656 of 2000
2023-11-13 18:37:18.764893: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 826 of 2000
2023-11-13 18:37:42.493384: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 1150 of 2000
2023-11-13 18:37:58.803791: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a

Epoch 2/20


2023-11-13 19:51:28.524158: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 162 of 2000
2023-11-13 19:51:38.545572: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 329 of 2000
2023-11-13 19:51:58.551984: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 681 of 2000
2023-11-13 19:52:18.539674: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 1023 of 2000
2023-11-13 19:52:38.530519: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 1328 of 2000
2023-11-13 19:52:58.541930: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take 

Epoch 3/20


2023-11-13 21:05:52.731352: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 160 of 2000
2023-11-13 21:06:12.749732: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 497 of 2000
2023-11-13 21:06:22.764595: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 655 of 2000
2023-11-13 21:06:32.883716: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 815 of 2000
2023-11-13 21:06:52.721806: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 1115 of 2000
2023-11-13 21:07:12.715518: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a

Epoch 4/20


2023-11-13 22:20:25.821371: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 141 of 2000
2023-11-13 22:20:35.858188: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 269 of 2000
2023-11-13 22:20:55.811898: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 551 of 2000
2023-11-13 22:21:15.835396: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 826 of 2000
2023-11-13 22:21:35.771281: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 1090 of 2000
2023-11-13 22:21:45.824421: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a

Epoch 5/20


2023-11-13 23:35:46.599868: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 131 of 2000
2023-11-13 23:36:06.563387: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 422 of 2000
2023-11-13 23:36:26.558369: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 722 of 2000
2023-11-13 23:36:36.591161: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 855 of 2000
2023-11-13 23:36:46.639798: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 996 of 2000
2023-11-13 23:37:06.553626: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a 

Epoch 6/20


2023-11-14 00:50:33.573834: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 130 of 2000
2023-11-14 00:50:43.602726: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 258 of 2000
2023-11-14 00:51:03.616964: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 570 of 2000
2023-11-14 00:51:23.576066: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 851 of 2000
2023-11-14 00:51:33.592130: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 989 of 2000
2023-11-14 00:51:43.603346: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a 

Epoch 7/20


2023-11-14 02:05:17.447429: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 134 of 2000
2023-11-14 02:05:37.407285: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 425 of 2000
2023-11-14 02:05:47.427212: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 554 of 2000
2023-11-14 02:06:07.427341: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 834 of 2000
2023-11-14 02:06:17.453329: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 969 of 2000
2023-11-14 02:06:37.399692: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a 

Epoch 8/20


2023-11-14 03:10:15.520358: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 122 of 2000
2023-11-14 03:10:35.501537: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 294 of 2000
2023-11-14 03:10:55.397092: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 510 of 2000
2023-11-14 03:11:05.410264: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 632 of 2000
2023-11-14 03:11:15.489541: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 743 of 2000
2023-11-14 03:11:35.405085: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a 

Epoch 9/20


2023-11-14 03:48:00.416546: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 78 of 2000
2023-11-14 03:48:20.370586: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 221 of 2000
2023-11-14 03:48:40.361193: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 373 of 2000
2023-11-14 03:48:50.411472: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 451 of 2000
2023-11-14 03:49:10.400459: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a while): 612 of 2000
2023-11-14 03:49:30.381495: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:422] ShuffleDatasetV3:13: Filling up shuffle buffer (this may take a w



In [None]:
model.evaluate(ds_test)



[0.12993596494197845,
 0.9573177099227905,
 array([0.97757834, 0.55715895], dtype=float32)]

In [None]:
model.save("../models/model_v6_new_ds.keras")

In [None]:
# save history
history = pd.DataFrame.from_dict(history.history)
history.to_csv("../models/history_model_v6_new_ds.csv", index=False)