In [None]:
import keras
from keras import ops
from keras import layers



In [None]:
class TransformerBlock(layers.Layer):
  def __init__(self,embed_dim, num_heads ,ff_dim, rate = 0.1):
    super().__init__()
    self.att = layers.MultiHeadAttention(num_heads = num_heads, key_dim = embed_dim)
    self.ffn = keras.Sequential([layers.Dense(ff_dim, activation = 'relu'),layers.Dense(embed_dim)])
    self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
    self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
    self.dropout1 = layers.Dropout(rate)
    self.dropout2 = layers.Dropout(rate)

  def call(self,input):
    attn_output = self.att(input,input)
    attn_output = self.dropout1(attn_output)
    out1 = self.layernorm1(input + attn_output)
    fnf_output = self.ffn(out1)
    fnf_output = self.dropout2(fnf_output)
    return self.layernorm2(out1 + fnf_output)





In [None]:
class TokenAndPositionEmbedding(layers.Layer):
  def __init__(self,maxlen,vocal_size, embed_dim):
    super().__init__()
    self.token_emb = layers.Embedding(input_dim = vocab_size ,output_dim = embed_dim)
    self.pos_emb = layers.Embedding(input_dim = maxlen ,output_dim = embed_dim)
  def call(self,x):
    maxlen = ops.shape(x)[-1]
    position = ops.arange(start=0, stop =maxlen, step = 1)
    position = self.pos_emb(position)
    x = self.token_emb(x)
    return x + position


In [None]:
vocab_size = 30000
maxlen = 200
(x_train,y_train),(x_val,y_val) = keras.datasets.imdb.load_data(num_words = vocab_size)

In [None]:
x_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 22665,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 21631,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 2,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5,
 1

In [None]:
x_train = keras.utils.pad_sequences(x_train,maxlen = maxlen)
x_val = keras.utils.pad_sequences(x_val,maxlen = maxlen)

In [None]:
embed_dim = 32
num_heads = 2
ff_dim = 32

inputs = layers.Input(shape = (maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20,activation = 'relu')(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2,activation= 'softmax')(x)

model = keras.Model(inputs = inputs, outputs = outputs)

In [None]:
model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy',metrics = ['accuracy'])
history = model.fit(x_train ,y_train, batch_size=32, epochs = 2,validation_data = (x_val, y_val))


Epoch 1/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 150ms/step - accuracy: 0.7226 - loss: 0.5107 - val_accuracy: 0.8748 - val_loss: 0.2914
Epoch 2/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 146ms/step - accuracy: 0.9367 - loss: 0.1729 - val_accuracy: 0.8707 - val_loss: 0.3118
