In [3]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, Embedding, GlobalAveragePooling1D
    , LayerNormalization, MultiHeadAttention, Conv1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
import numpy as np
import pandas as pd
# Constants
MAXLEN = 200  # Maximum sequence length
NUM_HEADS = 2  # Number of attention heads (reduced for simplicity)
FF_DIM = 128  # Feed-forward dimension in each Transformer block (reduced for simplicity)
NUM_TRANSFORMER_BLOCKS = 2  # Number of Transformer blocks (reduced for simplicity)
VOCAB_SIZE = 5000  # Vocabulary size
EMBED_DIM = 128  # Embedding dimension (reduced for simplicity)
# Load IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=VOCAB_SIZE)
df_train = pd.DataFrame({'review': x_train, 'sentiment': y_train})
df_test = pd.DataFrame({'review': x_test, 'sentiment': y_test})
print(df_train.head)
# print(x_train[0],y_train[0])
# Get the word index
word_index = imdb.get_word_index()
# Reserve the first indices for special tokens
word_index = {k: (v + 3) for k, v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3
# Reverse the word index to map integers to words
reverse_word_index = {value: key for key, value in word_index.items()}
# Function to decode reviews back to words
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i, '?') for i in encoded_review])
# Print the first 5 reviews and their labels
for i in range(5):
    print(f"Review {i+1}: {decode_review(x_train[i])}")
    print(f"Label {i+1}: {y_train[i]}")
    print()
y_train_df = pd.DataFrame({'label': y_train})
x_test_df = pd.DataFrame({'review': x_test})
y_test_df = pd.DataFrame({'label': y_test})
# Select half of the dataset using iloc
train_size = len(x_train_df) // 4
test_size = len(x_test_df) // 4
x_train_half = x_train_df.iloc[:train_size]
y_train_half = y_train_df.iloc[:train_size]
x_test_half = x_test_df.iloc[:test_size]
y_test_half = y_test_df.iloc[:test_size]
# Ensure sequences are padded to the same length
x_train_half_padded = pad_sequences(x_train_half['review'].tolist(), maxlen=MAXLEN)
x_test_half_padded = pad_sequences(x_test_half['review'].tolist(), maxlen=MAXLEN)
# Build Transformer model
def build_transformer_model(maxlen, vocab_size, embed_dim, num_heads, ff_dim,
num_transformer_blocks):
inputs = Input(shape=(maxlen,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)
# Positional encoding
positions = np.arange(maxlen).reshape(-1, 1)
positional_encoding = np.zeros((maxlen, embed_dim))
positional_encoding[:, 0::2] = np.sin(positions / 10000**(2 * np.arange(embed_dim)[0::2]
 / embed_dim))
positional_encoding[:, 1::2] = np.cos(positions / 10000**(2 * np.arange(embed_dim)[1::2]
 / embed_dim))
x = embedding_layer + positional_encoding
# Transformer blocks
for _ in range(num_transformer_blocks):
# Multi-head self-attention
x1 = LayerNormalization()(x)
x2 = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim // num_heads)(x1, x1)
x = x1 + x2
# Feed-forward network
x1 = LayerNormalization()(x)
x2 = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x1)
x = x1 + x2
# Global average pooling and classification
x = GlobalAveragePooling1D()(x)
outputs = Dense(1, activation="sigmoid")(x)
model = Model(inputs=inputs, outputs=outputs)
return model
# Build and compile the model
transformer_model = build_transformer_model(MAXLEN, VOCAB_SIZE, EMBED_DIM, NUM_HEADS, FF_DIM
    , NUM_TRANSFORMER_BLOCKS)
transformer_model.compile(optimizer=Adam(learning_rate=1e-4), loss="binary_crossentropy",
    metrics=["accuracy"])
# Print model summary
transformer_model.summary()
# Train the model
transformer_model.fit(np.array(x_train_half_padded), np.array(y_train_half['label']), epochs
=2, batch_size=32, validation_data=(np.array(x_test_half_padded), np.array(y_test_half['
label'])))
# Evaluate the model
loss, accuracy = transformer_model.evaluate(np.array(x_test_half_padded), np.array(
    y_test_half['label']))
print(f"Test Accuracy: {accuracy * 100:.2f}%")
def predict_sentiment(review, model, maxlen):
# Tokenize and pad the review
review_seq = imdb.get_word_index()
review_seq = {k:(v+3) for k,v in review_seq.items()}
tokenized_review = [review_seq[word] if word in review_seq and review_seq[word] <
VOCAB_SIZE else 2 for word in review.split()]
padded_review = pad_sequences([tokenized_review], maxlen=maxlen)
# Predict sentiment
prediction = model.predict(padded_review)[0, 0]
sentiment = "positive" if prediction >= 0.5 else "negative"
confidence = prediction if prediction >= 0.5 else 1 - prediction
return sentiment, confidence
# Example usage of sentiment analysis function
new_review = "This movie good! The acting was bad and the plot was not engaging."
sentiment, confidence = predict_sentiment(new_review, transformer_model, MAXLEN)
print(f"Review: '{new_review}'")
print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence * 100:.2f}%)")                                                                         

IndentationError: unexpected indent (896748184.py, line 3)