In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import numpy as np

# Load the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.imdb.load_data(num_words=10000)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [2]:
def preprocess_data(sequences, maxlen, padding='post'):
    return tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=maxlen, padding=padding)

maxlen = 100

train_data = preprocess_data(train_data, maxlen)
test_data = preprocess_data(test_data, maxlen)


In [3]:
class TransformerModel(tf.keras.Model):
    def __init__(self, num_heads, num_layers, d_model, vocab_size, dff, dropout):
        super(TransformerModel, self).__init__()
        self.embedding = layers.Embedding(input_dim=vocab_size, output_dim=d_model)
        self.attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.dense1 = layers.Dense(units=d_model)
        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout)
        self.ffn = layers.Dense(units=dff, activation='relu')
        self.dense2 = layers.Dense(units=d_model)
        self.norm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout2 = layers.Dropout(dropout)
        self.global_pool = layers.GlobalAveragePooling1D()
        self.fc_out = layers.Dense(units=1, activation='sigmoid')

    def call(self, x, training):
        x = self.embedding(x)
        attn_output = self.attention(x, x)
        x1 = self.dense1(attn_output)
        x = self.norm1(x + x1)
        x = self.dropout1(x, training=training)
        ffn_output = self.ffn(x)
        x2 = self.dense2(ffn_output)
        x = self.norm2(x + x2)
        x = self.dropout2(x, training=training)
        x = self.global_pool(x)
        return self.fc_out(x)

# Define the model architecture
num_heads = 2
num_layers = 2
d_model = 64
vocab_size = 10000
dff = 512
dropout = 0.1

model = TransformerModel(num_heads, num_layers, d_model, vocab_size, dff, dropout)



In [4]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Train the model
epochs = 10
history = model.fit(train_data, train_labels, epochs=epochs, validation_data=(test_data, test_labels), batch_size=64)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
