In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, ffnn_dim, n_heads):
        super(TransformerBlock, self).__init__()
        self.attention = layers.MultiHeadAttention(n_heads,key_dim=embed_dim)
        self.ffnn = keras.Sequential(
            [layers.Dense(ffnn_dim, activation = "relu"),
             layers.Dense(128, activation = "relu"),
             layers.Dense(embed_dim)]
        )
        
        self.norm1 = layers.LayerNormalization(epsilon = 1e-6)
        self.norm2 = layers.LayerNormalization(epsilon = 1e-6)
        self.drop1 = layers.Dropout(0.1)
        self.drop2 = layers.Dropout(0.1)
        
    def call(self, inputs, training):
        attention_out = self.attention(inputs, inputs)
        attention_out = self.drop1(attention_out, training=training)
        out1 = self.norm1(inputs + attention_out)
        ffnn_out = self.ffnn(out1)
        ffnn_out = self.drop2(ffnn_out, training=training)
        return self.norm2(out1)

In [3]:
class TokenAndPosEmbed(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPosEmbed, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim = embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
        
    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [4]:
vocab_size = 20000
maxlen = 200

# test train split

# pad_sequence adds padding to each array suppose [1],[1,2],[1,2,3] are three
# arrays then pad_sequence will make it such that the output is [0,0,1],
# [0,1,2],[1,2,3]. Assuming array 3 is maxlen, i.e maxlen = 2.

x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_train = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

NameError: name 'x_train' is not defined

## Classifier call

In [5]:
embed_dim = 64 #embed size 
ffnn_dim = 64 #Neural network input dimension. Same as embed_dim
num_heads = 4

input_strm = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPosEmbed(maxlen, vocab_size, embed_dim)
nn = embedding_layer(input_strm)
transformer = TransformerBlock(embed_dim = embed_dim,ffnn_dim = ffnn_dim,n_heads = num_heads)(nn)
nn = layers.GlobalAveragePooling1D()(nn)
nn = layers.Dropout(0.1)(nn)
nn = layers.Dense(128, activation='relu')(nn)
nn = layers.Dropout(0.1)(nn)
outputs = layers.Dense(2, activation = "softmax")(nn)

cybertron = keras.Model(inputs = input_strm, outputs = outputs)

## Train and evaluate

In [7]:
cybertron.compile(optimizer = "adam", loss="categorical_crossentropy",metrics="accuracy")

history = cybertron.fit(
    x_train, y_train, batch_size = 32, epochs = 5, validation_data=(x_val, y_val)
)

NameError: name 'x_train' is not defined