In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

2024-06-11 09:50:48.817833: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-11 09:50:48.848910: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Applying the temporal fusion transformers gatting mechanismis using tensorflow 

The teoric text is in my final course work "", in 1.6.1 cap

In [2]:
class GLU(tf.keras.layers.Layer):
    def __init__(self,units):
        """Define a Gate Linear Unit

        Args:
            units (int): the units quantities in the dense layer on GLU
        """
        super(GLU,self).__init__()

        self.linear = tf.keras.layers.Dense(units)                      #creating a layer to calculate W_{5}x+b_{5}
        self.dense_sigmoid = tf.keras.layers.Dense(units,activation='sigmoid') #creating a layer to calculate sigmoid(W_{4}x+b_{4})

    def call(self,inputs):
        """Calculate the element wise multiplication between the layers

        Args:
            inputs (tf.Tensor,tf.Dataset or np.array): the input to calculate the GLU

        Returns:
            tf.Tensor,tf.Dataset or np.array: element_wise_product
        """
        element_wise_product = self.dense_sigmoid(inputs) * self.linear(inputs)
        return element_wise_product

In [38]:
class Drop_GLU_Add_Norm(tf.keras.layers.Layer):
    def __init__(self, units, drop_rate):
        """Define the process of apply a dropout, a GLU add the residuals to GLU output and normalize them

        Args:
            units (int): the units quantities in the dense layer on GLU
            drop_rate (float): a float number 0 <= drop_rate <= 1, where define te dropout rate
        """
        super(Drop_GLU_Add_Norm,self).__init__()
        self.units= units
        self.drop_rate = drop_rate

        self.dropout_layer = tf.keras.layers.Dropout(self.drop_rate)
        self.layer_GLU = GLU(self.units)
        self.norm_layer = tf.keras.layers.LayerNormalization()

    def call(self,inputs, residual):
        """Compute all process 

        Args:
            inputs (tf.Tensor,tf.Dataset or np.array): real input
            residual (tf.Tensor,tf.Dataset or np.array): the residuals input to add to normalize
        Returns:
            tf.Tensor,tf.Dataset or np.array : normalized_values
        """
        print("\nINPUT\n")
        print(inputs)
        print("\nRESIDUAL\n")
        print(residual)
        input_droped = self.dropout_layer(inputs)
        print("\nDROPOUT\n")
        print(input_droped)
        glu_output = self.layer_GLU(input_droped)
        print("\GLU\n")
        print(glu_output)
        normalized_values = self.norm_layer(glu_output + residual)
        return normalized_values

In [44]:
class GRN(tf.keras.layers.Layer):
    def __init__(self, units,d_model,drop_rate, optional_context=False):
        super(GRN,self).__init__()

        self.units = units
        self.d_model = d_model
        self.drop_rate = drop_rate
        self.optional_context = optional_context

        self.layer_ELU = tf.keras.layers.ELU()
        self.first_linear = tf.keras.layers.Dense(self.units)
        self.second_linear = tf.keras.layers.Dense(self.d_model)

        if self.optional_context:
            self.linear_optioal = tf.keras.layers.Dense(self.units,use_bias=False)

        
        self.add_norm = Drop_GLU_Add_Norm(units=self.d_model,
                                          drop_rate=self.drop_rate)
        
    def call(self,inputs):
        if self.optional_context:
            X, context = inputs
            dense_out = self.first_linear(X)
            context_out = self.linear_optioal(context)
            first_output = self.layer_ELU(dense_out + context_out)
        else:
            X = inputs
            dense_out = self.first_linear(X)
            first_output = self.layer_ELU(dense_out)

        second_output = self.second_linear(first_output)

        final_output = self.add_norm(second_output,X)

        return final_output


In [61]:
import tensorflow as tf
from tensorflow.keras import layers



class VariableSelectionNetwork(tf.keras.layers.Layer):
    def __init__(self,units, d_model, dropout_rate, additional_context=None, **kwargs):
        super(VariableSelectionNetwork, self).__init__(**kwargs)
        self.units = units
        self.d_model = d_model
        self.dropout_rate = dropout_rate
        self.additional_context = additional_context

        self.flattened_grn = GRN(self.units,d_model= self.d_model,drop_rate=dropout_rate, optional_context=additional_context)
        self.per_feature_grn = [GRN(self.units,d_model= self.d_model,drop_rate=dropout_rate) for _ in range(d_model)]


    def call(self, inputs):
        # Non Static Inputs
        if self.additional_context:
            embedding, static_context = inputs

            time_steps = embedding.shape[1]
            flatten = tf.reshape(embedding, (-1, time_steps, self.d_model))

            static_context = tf.expand_dims(static_context, axis=1)

            # Nonlinear transformation with gated residual network.
            mlp_outputs = self.flattened_grn((flatten, static_context))

            sparse_weights = tf.nn.softmax(mlp_outputs, axis=-1)
            sparse_weights = tf.expand_dims(sparse_weights, axis=2)

            trans_emb_list = []
            for i in range(self.output_size):
                e = self.per_feature_grn(embedding[..., i])
                trans_emb_list.append(e)
            transformed_embedding = tf.concat(trans_emb_list, axis=-1)

            combined = sparse_weights * transformed_embedding

            temporal_ctx = tf.reduce_sum(combined, axis=-1)

        # Static Inputs
        else:
            embedding = inputs

            print("\n my input:\n")
            print(embedding)
            time_steps = embedding.shape[1]
            flatten = tf.reshape(embedding, (-1, time_steps, self.d_model))
            print("\n my flatten vector:\n")
            print(flatten)
            
            mlp_outputs = self.flattened_grn(flatten)
            print("\n apply GRN:\n")
            print(mlp_outputs)
            sparse_weights = tf.nn.softmax(mlp_outputs, axis=-1)
            print("\n apply the softmax:\n")
            print(sparse_weights)
            sparse_weights = tf.expand_dims(sparse_weights, axis=-1)
            print("\n expanding the dimensions:\n")
            print(sparse_weights)

            trans_emb_list = []
            for i in range(time_steps):
                #e = self.per_feature_grn(tf.expand_dims(embedding[: , :, i], axis=1))
                e = self.per_feature_grn[i](tf.expand_dims(embedding[:,:,i],axis=1))
                trans_emb_list.append(e)
            transformed_embedding = tf.concat(trans_emb_list, axis=1)
            print(f"\n concat the elements:\n")
            print(transformed_embedding)

            combined = sparse_weights * transformed_embedding
            print(f"\n multiplying the weights and transformed elements :\n")
            print(transformed_embedding)
            temporal_ctx = tf.reduce_sum(combined, axis=1)
            print(f"\n sum all:\n")
            print(temporal_ctx)
        return temporal_ctx, sparse_weights



In [62]:
y = tf.random.uniform(shape=(1, 8, 6))

In [63]:
y

<tf.Tensor: shape=(1, 8, 6), dtype=float32, numpy=
array([[[0.5616244 , 0.28085256, 0.1318121 , 0.6902391 , 0.20553458,
         0.5646901 ],
        [0.7794831 , 0.98759806, 0.14046597, 0.8220762 , 0.24682856,
         0.23075736],
        [0.6895392 , 0.43073773, 0.91565204, 0.8308842 , 0.47240186,
         0.25788152],
        [0.09519708, 0.411788  , 0.07336771, 0.7346499 , 0.33646667,
         0.29076874],
        [0.17374647, 0.42647767, 0.36155653, 0.19456589, 0.79776347,
         0.42057192],
        [0.38651323, 0.12302053, 0.44405568, 0.5620618 , 0.93665445,
         0.93753755],
        [0.5958245 , 0.8004904 , 0.94326925, 0.5396719 , 0.9968792 ,
         0.18966377],
        [0.47886646, 0.5347481 , 0.90754366, 0.7479423 , 0.33037877,
         0.27249277]]], dtype=float32)>

In [64]:
model = VariableSelectionNetwork(units=32,d_model=6,dropout_rate=0.0)
model(y)


 my input:

tf.Tensor(
[[[0.5616244  0.28085256 0.1318121  0.6902391  0.20553458 0.5646901 ]
  [0.7794831  0.98759806 0.14046597 0.8220762  0.24682856 0.23075736]
  [0.6895392  0.43073773 0.91565204 0.8308842  0.47240186 0.25788152]
  [0.09519708 0.411788   0.07336771 0.7346499  0.33646667 0.29076874]
  [0.17374647 0.42647767 0.36155653 0.19456589 0.79776347 0.42057192]
  [0.38651323 0.12302053 0.44405568 0.5620618  0.93665445 0.93753755]
  [0.5958245  0.8004904  0.94326925 0.5396719  0.9968792  0.18966377]
  [0.47886646 0.5347481  0.90754366 0.7479423  0.33037877 0.27249277]]], shape=(1, 8, 6), dtype=float32)

 my flatten vector:

Tensor("Reshape:0", shape=(1, 8, 6), dtype=float32)

INPUT

Tensor("dense_343_1/Add:0", shape=(1, 8, 6), dtype=float32)

RESIDUAL

Tensor("Reshape:0", shape=(1, 8, 6), dtype=float32)

DROPOUT

Tensor("dense_343_1/Add:0", shape=(1, 8, 6), dtype=float32)
\GLU

Tensor("glu_86_1/mul:0", shape=(1, 8, 6), dtype=float32)

INPUT

Tensor("dense_343_1/Add:0", shape=(

2024-06-11 10:17:38.181428: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes
2024-06-11 10:17:38.181451: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: Exception encountered when calling Drop_GLU_Add_Norm.call().

[1m{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:AddV2] name: [0m

Arguments received by Drop_GLU_Add_Norm.call():
  • inputs=tf.Tensor(shape=(1, 1, 6), dtype=float32)
  • residual=tf.Tensor(shape=(1, 1, 8), dtype=float32)

In [12]:
per_feature_grn = [GRN(32,d_model= 6,drop_rate=0.0) for _ in range(6)]

In [13]:
per_feature_grn = [GRN(32,d_model= 6,drop_rate=0.0) for _ in range(6)]

In [30]:
testando = tf.random.uniform(shape=(1, 8, 6, 1))

teste_GRN = GRN(units=32,d_model=6,drop_rate=0.0)
for i in list(range(6)):
    e = per_feature_grn[i](tf.expand_dims(y[: , :, i], axis=1))
    print(f"\n element {i}:")
    print(e)


2024-06-11 10:01:44.648334: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes
2024-06-11 10:01:44.648360: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: Exception encountered when calling Drop_GLU_Add_Norm.call().

[1m{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:AddV2] name: [0m

Arguments received by Drop_GLU_Add_Norm.call():
  • inputs=tf.Tensor(shape=(1, 1, 32), dtype=float32)
  • residual=tf.Tensor(shape=(1, 1, 8), dtype=float32)

TypeError: 'GRN' object is not subscriptable

In [31]:
opa = Drop_GLU_Add_Norm(units=32,drop_rate=0.0)
opa(testando[:,:,1],testando[:,:,1])

<tf.Tensor: shape=(1, 8, 32), dtype=float32, numpy=
array([[[ 0.6773038 , -0.38210535, -0.4436226 ,  0.46208906,
         -0.32801628, -0.597528  ,  1.055737  , -0.9961195 ,
         -0.3975935 ,  0.93579245,  1.456058  , -0.06971025,
          0.76543474, -0.5420809 ,  1.6719556 ,  0.45393038,
         -0.84841824,  0.09780025,  1.0665584 ,  1.0427299 ,
          0.19485426, -0.6762171 ,  0.736012  ,  1.4454112 ,
         -0.7830076 , -1.4795971 , -0.29632902, -1.7841425 ,
         -1.6429558 , -1.752029  ,  0.50096846,  0.45682096],
        [ 0.66930056, -0.37932014, -0.44489098,  0.45336866,
         -0.3245635 , -0.5956259 ,  1.0321364 , -0.98268366,
         -0.39986134,  0.9176259 ,  1.4498649 , -0.07090378,
          0.769279  , -0.538311  ,  1.6430497 ,  0.45401335,
         -0.83765316,  0.09722519,  1.0759196 ,  1.0496573 ,
          0.19520426, -0.67557526,  0.71922636,  1.4293017 ,
         -0.78772306, -1.4676156 , -0.2966051 , -1.7494726 ,
         -1.6333203 , -1.7166891

In [None]:

##########################################################################
class VariableSelectionNetwork(tf.keras.layers.Layer):
  def __init__(self, d_model, dropout_rate, additional_context=None, **kwargs):
    super(VariableSelectionNetwork, self).__init__(**kwargs)
    self.d_model = d_model
    self.dropout_rate = dropout_rate
    self.additional_context = additional_context

    self.flattened_grn = GRN(d_model, drop_rate=dropout_rate, optional_context=additional_context)
    self.per_feature_grn = [GRN(d_model, drop_rate=dropout_rate) for _ in range(d_model)]

  def call(self, inputs):
    # Non-static inputs
    if self.additional_context is not None:
      embedding, static_context = inputs
      time_steps = tf.shape(embedding)[1]

      flatten = tf.reshape(embedding, (-1, time_steps, self.d_model * self.output_size))
      static_context = tf.expand_dims(static_context, axis=1)

      # Nonlinear transformation with gated residual network
      mlp_outputs = self.flattened_grn((flatten, static_context))
      sparse_weights = tf.nn.softmax(mlp_outputs, axis=-1)
      sparse_weights = tf.expand_dims(sparse_weights, axis=2)

      trans_emb_list = []
      for i in range(self.output_size):
        e = self.per_feature_grn[i](embedding[..., i])  # Select i-th feature
        trans_emb_list.append(e)
      transformed_embedding = tf.concat(trans_emb_list, axis=-1)

      combined = sparse_weights * transformed_embedding
      temporal_ctx = tf.reduce_sum(combined, axis=-1)

    # Static inputs
    else:
      embedding = inputs
      print("\n my input:\n")
      print(embedding)
      flatten = tf.reshape(embedding, [-1, self.d_model])  # Assuming batch_size is the first dimension
      print("\n my flatten vector:\n")
      print(flatten)
      # Nonlinear transformation with gated residual network
      mlp_outputs = self.flattened_grn(flatten)
      print("\n apllying GRN on flatten vector:\n")
      print(mlp_outputs)
      sparse_weights = tf.nn.softmax(mlp_outputs, axis=-1)
      print("\n apllying softmax:\n")
      print(sparse_weights)
      sparse_weights = tf.expand_dims(sparse_weights, axis=-1)
      print("\n expanding my dimensions:\n")
      print(sparse_weights)
      trans_emb_list = []
      for i in range(self.output_size):
        e = self.per_feature_grn[i](embedding[:, i:i + 1, :])  # Select i-th feature channel
        trans_emb_list.append(e)
      transformed_embedding = tf.concat(trans_emb_list, axis=1)

      combined = sparse_weights * transformed_embedding
      temporal_ctx = tf.reduce_sum(combined, axis=1)

    return temporal_ctx, sparse_weights