In [1]:
import tensorflow as tf
import keras
from keras.layers import Dense,LSTM,Masking

In [2]:
import numpy as np 

In [3]:
class BahDanau_attention(tf.keras.layers.Layer) :
    def __init__ (self,units,verbose=0):
        super(BahDanau_attention, self).__init__()
        self.__W1 = keras.layers.Dense(units)
        self.__W2 = keras.layers.Dense(units)
        self.__V = keras.layers.Dense(1)
        self.verbose = verbose

    def call (self,query,values) :
        if self.verbose :
            print("\n********** Bahdanau Start ***********")
            print("query (decoder hidden state) : (batch_size,hidden size)", query.shape)
            print("values (encoder all hidden state ) : (batch_size,max_len,hidden_size)",values.shape)
        query_with_time_axis = tf.expand_dims(query,1)
        if self.verbose :
            print("query_with_time_axis : (batch_size,1,hidden size)",query_with_time_axis.shape)
            score = self.V(tf.nn.tanh(self.__W1(query_with_time_axis) + self.__W2(values)))

        if self.verbose :
            print("score : (batch_size,1,1)", score.shape)
            attention_weight = tf.nn.softmax(score,axis=1)
        
        if self.verbose :
            print("attention_weight : (batch_size,1,max_len)", attention_weight.shape)
            context_vector = attention_weight * values
        if self.verbose :
            print("context_vector : (batch_size,max_len,hidden_size)", context_vector.shape)
        context_vector = tf.reduce_sum(context_vector,axis=1)

        if self.verbose:
            print("context_verctor after reduce sum : (batch_size,max_length,hidden_size) ",context_vector.shape)
            print("\n*******Bahdanau Is End*******")
        return context_vector,attention_weight



In [4]:
def make_random () :
    return [np.random.randint(0,9) for i in range(4)]

In [5]:
X = [make_random() for i in range(3000)]
Y = [make_random() for i in range(3000)]


In [6]:
Xs = keras.utils.to_categorical(X,num_classes=10)
Ys = keras.utils.to_categorical(Y,num_classes=10)

In [8]:
verbose = 0
batch_size = 1
encoder_inputs = keras.Input(shape=(4,10))
encoder_lstm = keras.layers.LSTM(32,return_state=True)
encoder_outputs,encoder_state_h,encoder_state_c = encoder_lstm(encoder_inputs)
encoder_states = [encoder_state_h,encoder_state_c]
attention = BahDanau_attention(32,verbose=verbose)
decoder_inputs = keras.Input(shape=(1,(4+32)))
decoder_lstm = keras.layers.LSTM(32,return_state=True,return_sequences=True)
decoder_dense = keras.layers.Dense(10,activation='softmax')

all_input = list()
inputs = np.zeros((batch_size,1,10))
inputs[:,0,0] = 1
decoder_outputs = encoder_state_h
states = encoder_states
for _ in range(4) :
    context_vector,attention_weight = attention.call(decoder_outputs,encoder_outputs)
    inputs = tf.concat([context_vector,inputs],axis=-1)
    decoder_outputs,state_h,state_c = decoder_lstm(inputs,initial_state=states)
    outputs = decoder_dense(decoder_dense)
    outputs = tf.expand_dims(outputs,1)
    all_input.append(outputs)
    inputs = outputs
    states = [state_h,state_c]

decoder_outputs = keras.layers.Lambda(lambda x : keras.layers.concatenate(x,axis=1))(all_input)


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [7]:
import numpy as np
import tensorflow as tf

def scaled_dot_product_attention(Q, K, V):
    d_k = tf.cast(tf.shape(K)[-1], tf.float32)  # Dimensi key
    scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(d_k)
    attention_weights = tf.nn.softmax(scores, axis=-1)
    output = tf.matmul(attention_weights, V)
    return output

# Contoh input
Q = tf.constant([[1.0, 0.5], [0.8, 0.3]], dtype=tf.float32)
K = tf.constant([[0.9, 0.7], [0.4, 0.6]], dtype=tf.float32)
V = tf.constant([[1.5, 2.0], [1.2, 1.8]], dtype=tf.float32)

output = scaled_dot_product_attention(Q, K, V)
print(output)


tf.Tensor(
[[1.378806  1.919204 ]
 [1.37263   1.9150866]], shape=(2, 2), dtype=float32)


In [13]:
class LuongAttention (keras.layers.Layer):
    def __init__ (self,units) :
        super(LuongAttention,self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
    def call(self, query, values):
        print('\n******* Luong Attention  STARTS******')
        print('query (decoder hidden state): (batch_size, hidden size) ', query.shape)
        print('values (encoder all hidden state): (batch_size, max_len, hidden size) ', values.shape)
        query_with_time_axis = tf.expand_dims(query, 1)

        print('query_with_time_axis:(batch_size, 1, hidden size) ', query_with_time_axis.shape)


        values_transposed = tf.transpose(values, perm=[0, 2,1])
        print('values_transposed:(batch_size, hidden size, max_len) ', values_transposed.shape)

        score = tf.transpose(tf.matmul(query_with_time_axis, values_transposed) , perm=[0, 2, 1])

        print('score: (batch_size, max_length, 1) ',score.shape)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        print('context_vector before reduce_sum: (batch_size, max_length, hidden_size) ',context_vector.shape)
        context_vector = tf.reduce_sum(context_vector, axis=1)
        print('context_vector after reduce_sum: (batch_size, hidden_size) ',context_vector.shape)


        print('\n******* Luong Attention ENDS******')
        return context_vector, attention_weights


In [14]:
verbose = 0
batch_size = 1
encoder_inputs = keras.Input(shape=(4,10))
encoder_lstm = keras.layers.LSTM(32,return_state=True)
encoder_outputs,encoder_state_h,encoder_state_c = encoder_lstm(encoder_inputs)
encoder_states = [encoder_state_h,encoder_state_c]
attention = LuongAttention(32)
decoder_inputs = keras.Input(shape=(1,(4+32)))
decoder_lstm = keras.layers.LSTM(32,return_state=True,return_sequences=True)
decoder_dense = keras.layers.Dense(10,activation='softmax')

all_input = list()
inputs = np.zeros((batch_size,1,10))
inputs[:,0,0] = 1
decoder_outputs = encoder_state_h
states = encoder_states
for _ in range(4) :
    context_vector,attention_weight = attention(decoder_outputs,encoder_outputs)
    inputs = tf.concat([context_vector,inputs],axis=-1)
    decoder_outputs,state_h,state_c = decoder_lstm(inputs,initial_state=states)
    outputs = decoder_dense(decoder_dense)
    outputs = tf.expand_dims(outputs,1)
    all_input.append(outputs)
    inputs = outputs
    states = [state_h,state_c]

decoder_outputs = keras.layers.Lambda(lambda x : keras.layers.concatenate(x,axis=1))(all_input)



******* Luong Attention  STARTS******
query (decoder hidden state): (batch_size, hidden size)  (None, 32)
values (encoder all hidden state): (batch_size, max_len, hidden size)  (None, 32)
query_with_time_axis:(batch_size, 1, hidden size)  (None, 1, 32)

******* Luong Attention  STARTS******
query (decoder hidden state): (batch_size, hidden size)  (None, 32)
values (encoder all hidden state): (batch_size, max_len, hidden size)  (None, 32)
query_with_time_axis:(batch_size, 1, hidden size)  (None, 1, 32)


1. The `call()` method of your layer may be crashing. Try to `__call__()` the layer eagerly on some test input first to see if it works. E.g. `x = np.random.random((3, 4)); y = layer(x)`
2. If the `call()` method is correct, then you may need to implement the `def build(self, input_shape)` method on your layer. It should create all variables used by the layer (e.g. by calling `layer.build()` on all its children layers).
Exception encountered: ''Dimension must be 2 but is 3 for '{{node transpose}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32](Placeholder_1, transpose/perm)' with input shapes: [?,32], [3].''


ValueError: Exception encountered when calling LuongAttention.call().

[1mCould not automatically infer the output shape / dtype of 'luong_attention_2' (of type LuongAttention). Either the `LuongAttention.call()` method is incorrect, or you need to implement the `LuongAttention.compute_output_spec() / compute_output_shape()` method. Error encountered:

Dimension must be 2 but is 3 for '{{node transpose}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32](Placeholder_1, transpose/perm)' with input shapes: [?,32], [3].[0m

Arguments received by LuongAttention.call():
  • args=('<KerasTensor shape=(None, 32), dtype=float32, sparse=False, name=keras_tensor_17>', '<KerasTensor shape=(None, 32), dtype=float32, sparse=False, name=keras_tensor_16>')
  • kwargs=<class 'inspect._empty'>