In [1]:
import numpy as np
import tensorflow as tf

# **Loaded IMDB dataset**

---



---



In [2]:
(X_train,Y_train),(X_test,Y_test)=tf.keras.datasets.imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


# **Padded sequences to make all vector size equal**

---



---



In [3]:
X_train_padding=tf.keras.preprocessing.sequence.pad_sequences(X_train,maxlen=200,padding='post')
X_test_padding=tf.keras.preprocessing.sequence.pad_sequences(X_test,maxlen=200,padding='post')

# **Defined Positional encoding function**

---



---



In [5]:
def positional_encoding(seq_len,d_model):
  # seq_len,d_model=seq_len,d_model
  pos_encoding=np.zeros((seq_len,d_model))
  for i in range(seq_len):
    for j in range(0,d_model,2):
      pos_encoding[i,j]=np.sin(i/(10000**(j/d_model)))
      pos_encoding[i,j+1]=np.cos(i/(10000**((j+1)/d_model)))
  return tf.cast(pos_encoding,dtype=tf.float32)

# **Built Custom Transformer model**

---



---




In [6]:
class transformer(tf.keras.layers.Layer):
  def __init__(self,d_model,num_heads,dff,rate=0.1):
    super(transformer,self).__init__()
    self.mha=tf.keras.layers.MultiHeadAttention(num_heads=num_heads,key_dim=d_model)
    self.ffn=tf.keras.Sequential([
        tf.keras.layers.Dense(dff,activation='relu'),
        tf.keras.layers.Dense(d_model)
    ])
    self.norm1=tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.norm2=tf.keras.layers.LayerNormalization(epsilon=1e-6)
    self.dropout=tf.keras.layers.Dropout(rate=rate)
    self.dropout2=tf.keras.layers.Dropout(rate=rate)

  def call(self,X,training):
    attn_output=self.mha(X,X)
    attn_output=self.dropout(attn_output,training=training)
    out1=self.norm1(X+attn_output)
    fnn_output=self.ffn(out1)
    fnn_output=self.dropout2(fnn_output,training=training)
    out2=self.norm2(out1+fnn_output)
    return out2

# **Defined Model by layering**

---



---



In [7]:
seq_len=200
d_model=128
num_heads=4
dff=512
vocab_size=10000

inputs = tf.keras.Input(shape=(seq_len,))
x = tf.keras.layers.Embedding(vocab_size, d_model)(inputs)
pos_encoding = positional_encoding(seq_len, d_model)
x=x+pos_encoding
x = transformer(d_model, num_heads, dff)(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
x = tf.keras.layers.Dropout(0.1)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# **Trained model on IMDB dataset**

---



---



In [8]:
model.fit(X_train_padding,Y_train,epochs=5,validation_data=(X_test_padding,Y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7992d174da80>