<a href="https://colab.research.google.com/github/Rakitin11/SentimentAnalsisTask/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
import numpy as np


In [2]:
# Load IMDB dataset
num_words = 20000  # Consider the top 20,000 most frequent words
maxlen = 200       # Capture more context by increasing max sequence length

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to ensure uniform input length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print(f"Training samples: {len(x_train)}, Testing samples: {len(x_test)}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000, Testing samples: 25000


In [3]:
from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define the model
model = keras.Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=maxlen),

    Bidirectional(LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)),
    Bidirectional(LSTM(64, return_sequences=False, dropout=0.3, recurrent_dropout=0.3)),

    BatchNormalization(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()




In [4]:
# Callbacks for better training
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-5)

# Train the model
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=64,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1136s[0m 3s/step - accuracy: 0.6652 - loss: 0.5907 - val_accuracy: 0.8347 - val_loss: 0.3889 - learning_rate: 0.0010
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1170s[0m 3s/step - accuracy: 0.8723 - loss: 0.3172 - val_accuracy: 0.8282 - val_loss: 0.4308 - learning_rate: 0.0010
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1207s[0m 3s/step - accuracy: 0.9000 - loss: 0.2521 - val_accuracy: 0.6012 - val_loss: 1.3158 - learning_rate: 0.0010
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1168s[0m 3s/step - accuracy: 0.8480 - loss: 0.3513 - val_accuracy: 0.8466 - val_loss: 0.4063 - learning_rate: 2.0000e-04


In [5]:
# Evaluate on test data
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 443ms/step - accuracy: 0.8358 - loss: 0.3880
Test Accuracy: 0.8347
