In [44]:
!pip install datasets 'tensorflow==2.15'



In [45]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.layers import GRU, Dense, Embedding, Input
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout
from datasets import load_dataset

# Dataset creation and preprocessing
max_vocab_size = 25000  # Limit vocabulary size
max_seq_len = 450       # Maximum sequence length
embedding_dim = 150
latent_dim = 512  # Increased latent dimension for GRU
output_dim = 2

# Load the IMDb dataset from Hugging Face
dataset = load_dataset("imdb")

In [46]:

# Train Word2Vec embeddings using gensim
sentences = [text.split() for text in dataset['train']['text']]
word2vec_model = Word2Vec(sentences, vector_size=embedding_dim, window=5, min_count=1, workers=4)

# Create embedding matrix
def create_embedding_matrix(word_index, word2vec_model, embedding_dim):
    embedding_matrix = np.zeros((len(word_index), embedding_dim))
    for word, i in word_index.items():
        if word in word2vec_model.wv:
            embedding_matrix[i] = word2vec_model.wv[word]
    return embedding_matrix

# TextVectorization layer for preprocessing
vectorizer = TextVectorization(
    max_tokens=max_vocab_size,
    output_mode='int',
    output_sequence_length=max_seq_len
)

# Adapt the TextVectorization layer to the training data
vectorizer.adapt(dataset['train']['text'])

# Get the vocabulary and create the embedding matrix
vocab = vectorizer.get_vocabulary()
word_index = {word: idx for idx, word in enumerate(vocab)}
embedding_matrix = create_embedding_matrix(word_index, word2vec_model, embedding_dim)


In [47]:

# Create TensorFlow datasets
train_texts = tf.convert_to_tensor(dataset['train']['text'])
train_labels = tf.convert_to_tensor(dataset['train']['label'])
test_texts = tf.convert_to_tensor(dataset['test']['text'])
test_labels = tf.convert_to_tensor(dataset['test']['label'])

# Vectorize and preprocess text data
def preprocess_texts(text, label):
    text = vectorizer(text)
    label = tf.one_hot(label, depth=2)
    return text, label

train_ds = tf.data.Dataset.from_tensor_slices((train_texts, train_labels))
train_ds = train_ds.map(preprocess_texts).shuffle(10000).batch(256).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_texts, test_labels))
test_ds = test_ds.map(preprocess_texts).batch(256).prefetch(tf.data.AUTOTUNE)


In [48]:

# Define the CrossAttention Layer
class CrossAttention(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(key_dim=units, num_heads=1, **kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

    def call(self, x, context):
        attn_output, attn_scores = self.mha(query=x, value=context, return_attention_scores=True)
        attn_scores = tf.reduce_mean(attn_scores, axis=1)
        self.last_attention_weights = attn_scores
        x = self.add([x, attn_output])  # Residual Connection
        x = self.layernorm(x)
        return x

# Define the RNN with Attention model as a class
class RNNWithAttention(tf.keras.Model):
    def __init__(self, embedding_matrix, max_seq_len, embedding_dim, latent_dim, output_dim):
        super().__init__()
        self.embedding_layer = Embedding(
            input_dim=embedding_matrix.shape[0],
            output_dim=embedding_dim,
            weights=[embedding_matrix],
            trainable=False
        )
        self.encoder_gru = GRU(latent_dim, return_sequences=True, return_state=True,
                               kernel_regularizer=tf.keras.regularizers.l2(1e-4))
        self.dropout = tf.keras.layers.Dropout(0.3)
        self.attention_layer = CrossAttention(units=latent_dim)
        self.output_layer = Dense(output_dim, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(1e-4))

    def call(self, inputs):
        embeddings = self.embedding_layer(inputs)
        encoder_outputs, encoder_state = self.encoder_gru(embeddings)
        encoder_outputs = self.dropout(encoder_outputs)
        attention_outputs = self.attention_layer(encoder_state[:, tf.newaxis, :], encoder_outputs)
        outputs = self.output_layer(attention_outputs[:, 0, :])
        return outputs

# Build the model
sentiment_model = RNNWithAttention(
    embedding_matrix=embedding_matrix,
    max_seq_len=max_seq_len,
    embedding_dim=embedding_dim,
    latent_dim=latent_dim,
    output_dim=output_dim
)


# Compile the model
sentiment_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=["accuracy"]
)

# Build the model to show summary
sentiment_model.build(input_shape=(None, max_seq_len))
sentiment_model.summary()


Model: "rnn_with_attention_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_9 (Embedding)     multiple                  3750000   
                                                                 
 gru_8 (GRU)                 multiple                  1019904   
                                                                 
 dropout_9 (Dropout)         multiple                  0         
                                                                 
 cross_attention_8 (CrossAt  multiple                  1051648   
 tention)                                                        
                                                                 
 dense_8 (Dense)             multiple                  1026      
                                                                 
Total params: 5822578 (22.21 MB)
Trainable params: 2072578 (7.91 MB)
Non-trainable params: 3750000 (14.31 MB)
__

In [49]:

# Train the model with early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = sentiment_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=20,
    callbacks=[early_stopping]
)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [50]:

# Evaluate the model
results = sentiment_model.evaluate(test_ds)
print("Test Accuracy:", results[1])

# Manual testing
sample_positive = "This was the best movie I have ever seen."
sample_negative = "This was the worst movie I have ever watched."
sample_neutral = "The movie was okay, not great but not terrible."
sample_sarcasm = "Wow, this was such a masterpiece... the actors, the screenplay, I could stay for hours if it wasn't for how bad it was."
sample_irony = "The plot was so riveting, I couldn’t stop yawning."

# Preprocess the samples
sample_positive_vectorized = vectorizer(tf.convert_to_tensor([sample_positive]))
sample_negative_vectorized = vectorizer(tf.convert_to_tensor([sample_negative]))
sample_neutral_vectorized = vectorizer(tf.convert_to_tensor([sample_neutral]))
sample_sarcasm_vectorized = vectorizer(tf.convert_to_tensor([sample_sarcasm]))
sample_irony_vectorized = vectorizer(tf.convert_to_tensor([sample_irony]))

# Predict sentiment
positive_prediction = sentiment_model.predict(sample_positive_vectorized)
negative_prediction = sentiment_model.predict(sample_negative_vectorized)
neutral_prediction = sentiment_model.predict(sample_neutral_vectorized)
sarcasm_prediction = sentiment_model.predict(sample_sarcasm_vectorized)
irony_prediction = sentiment_model.predict(sample_irony_vectorized)

print("Positive Prediction:", positive_prediction)
print("Negative Prediction:", negative_prediction)
print("Neutral Prediction:", neutral_prediction)
print("Sarcasm Prediction:", sarcasm_prediction)
print("Irony Prediction:", irony_prediction)


Test Accuracy: 0.8244799971580505
Positive Prediction: [[0.04597345 0.9540266 ]]
Negative Prediction: [[0.9733128 0.0266872]]
Neutral Prediction: [[0.50900173 0.49099824]]
Sarcasm Prediction: [[0.64521104 0.35478896]]
Irony Prediction: [[0.47136495 0.528635  ]]
