In [13]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed
#define the TransformerBlock class
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn=tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_dim),
        ])
        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
    
    def call(self, inputs, training, mask=None):
        attn_output = self.attn(inputs, inputs, attention_mask=mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)

        return self.layernorm2(out1 + ffn_output)

In [14]:
# Define the DecisionTransformer model
class DecisionTransformer(tf.keras.Model):
    def __init__(self, state_dim, action_dim, embedding_dim, num_heads,
        ff_dim, num_layers):
        super(DecisionTransformer, self).__init__()
        self.state_embed = Dense(embedding_dim, activation='relu')
        self.action_embed = Dense(embedding_dim, activation='relu')
        self.transformer_layers = [TransformerBlock(embedding_dim,
        num_heads, ff_dim) for _ in range(num_layers)]
        self.dense = TimeDistributed(Dense(action_dim))
    def call(self, states, actions, training=False):
        state_embeddings = self.state_embed(states)
        action_embeddings = self.action_embed(actions)
        x = state_embeddings + action_embeddings
        for transformer_layer in self.transformer_layers:
            x = transformer_layer(x, training=training)
        return self.dense(x)



In [16]:
# Example usage

state_dim = 20
action_dim = 5
embedding_dim = 128
num_heads = 4
ff_dim = 512
num_layers = 6

# Initialize DecisionTransformer model

dt = DecisionTransformer(state_dim, action_dim, embedding_dim, num_heads,
ff_dim, num_layers)

# Generate example states and actions
states = tf.random.uniform((32, 100, state_dim))
# Batch of 32 sequences of 100 states
actions = tf.random.uniform((32,100, action_dim))

# Get the model predictions
output = dt(states, actions, training=True)
print(output.shape)
#should print (32, 100, 5) for batch size of 32 sequencel length 100, and action dimention 5


AttributeError: Exception encountered when calling TransformerBlock.call().

[1m'TransformerBlock' object has no attribute 'attn'[0m

Arguments received by TransformerBlock.call():
  • inputs=tf.Tensor(shape=(32, 100, 128), dtype=float32)
  • training=True
  • mask=None