In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models

In [None]:
vocab_size = 10000
max_len = 200

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print("Train samples:", len(X_train))
print("Test samples :", len(X_test))
print("First review (token ids):", X_train[0][:10], "...")
print("First label:", y_train[0])

Train samples: 25000
Test samples : 25000
First review (token ids): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65] ...
First label: 1


In [None]:
X_train_padded = pad_sequences(X_train, maxlen=max_len, padding='pre', truncating='pre')
X_test_padded  = pad_sequences(X_test,  maxlen=max_len, padding='pre', truncating='pre')

print("Padded shape (train):", X_train_padded.shape)
print("Padded shape (test) :", X_test_padded.shape)

Padded shape (train): (25000, 200)
Padded shape (test) : (25000, 200)


In [None]:
import math

class ScaledDotProductSelfAttention(layers.Layer):
    def __init__(self, d_model, d_k, d_v, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.d_k = d_k
        self.d_v = d_v

        self.W_q = layers.Dense(d_k)
        self.W_k = layers.Dense(d_k)
        self.W_v = layers.Dense(d_v)

    def call(self, x):

        Q = self.W_q(x)
        K = self.W_k(x)
        V = self.W_v(x)

        scores = tf.matmul(Q, K, transpose_b=True) / math.sqrt(self.d_k)

        attn_weights = tf.nn.softmax(scores, axis=-1)

        output = tf.matmul(attn_weights, V)
        return output, attn_weights

In [None]:
embedding_dim = 64
d_model = embedding_dim
d_k = 64
d_v = 64

inputs = layers.Input(shape=(max_len,))

x = layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len)(inputs)

attn_layer = ScaledDotProductSelfAttention(d_model=d_model, d_k=d_k, d_v=d_v)
attn_output, attn_weights = attn_layer(x)

x = layers.GlobalAveragePooling1D()(attn_output)

x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(1, activation='sigmoid')(x)

model = models.Model(inputs=inputs, outputs=outputs)

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()



In [None]:
history = model.fit(
    X_train_padded, y_train,
    epochs=3,
    batch_size=128,
    validation_split=0.2,
    verbose=1
)

Epoch 1/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 269ms/step - accuracy: 0.6392 - loss: 0.6293 - val_accuracy: 0.8676 - val_loss: 0.3144
Epoch 2/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 253ms/step - accuracy: 0.8944 - loss: 0.2687 - val_accuracy: 0.8804 - val_loss: 0.2870
Epoch 3/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 254ms/step - accuracy: 0.9372 - loss: 0.1760 - val_accuracy: 0.8810 - val_loss: 0.3035


In [None]:
test_loss, test_acc = model.evaluate(X_test_padded, y_test, verbose=0)
print(f"Accuracy: {test_acc:.4f}")

Accuracy: 0.8684
