In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import re

2024-11-18 03:35:11.454059: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-18 03:35:11.479243: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-18 03:35:11.486613: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-18 03:35:11.505055: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(tf.__version__)
!nvidia-smi

2.17.0
Mon Nov 18 03:35:15 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   57C    P0             28W /   70W |   13051MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                         

In [3]:
# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Parameters
max_features = 20000  # Increased vocabulary size
maxlen = 250  # Increased sequence length
embedding_dims = 256  # Increased embedding dimensions
batch_size = 64

In [4]:
# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [5]:
x_train = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=maxlen, padding='post', truncating='post')

In [6]:
total_samples = len(y_train)
n_positive = sum(y_train)
n_negative = total_samples - n_positive
class_weight = {
    0: total_samples / (2 * n_negative),
    1: total_samples / (2 * n_positive)
}

In [12]:
model = Sequential([
    Embedding(max_features, embedding_dims, input_length=maxlen),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [14]:
class LossThresholdCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('loss') < 0.2:
            print(f"\nLoss is below < 0.2. Stopping training.")
            self.model.stop_training = True

loss_threshold_callback = LossThresholdCallback()

In [13]:
history = model.fit(x_train, y_train,
                   batch_size=batch_size,
                   epochs=15,
                   validation_split=0.2,
                   callbacks=[loss_threshold_callback],
                   class_weight=class_weight,
                   verbose=1)

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.5313 - loss: 0.6823

AttributeError: 'LossThresholdCallback' object has no attribute 'threshold'

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"\nTest accuracy: {accuracy:.4f}")
print(f"Test loss: {loss:.4f}")

In [None]:
def clean_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and digits
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

def predict_sentiment(text, word_index=imdb.get_word_index()):
    # Clean the input text
    text = clean_text(text)
    
    # Reverse word index to get words from indices
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    
    # Convert text to sequence
    words = text.split()
    sequence = []
    for word in words:
        if word in word_index and word_index[word] < max_features:
            sequence.append(word_index[word])
    
    # Pad sequence
    sequence = pad_sequences([sequence], maxlen=maxlen, padding='post', truncating='post')
    
    # Predict
    prediction = model.predict(sequence)[0][0]
    # confidence = prediction if prediction > 0.5 else 1 - prediction
    # return "Positive" if prediction > 0.64 else "Positive", confidence
    return "Positive" if prediction > 0.64 else "Negative", prediction

In [None]:
positive_text = "This movie was really great! I enjoyed every moment of it."
negative_text = "This movie was terrible. Complete waste of time and money. The acting was horrible."

print("\nTesting with sample texts:")
sentiment, confidence = predict_sentiment(positive_text)
print(f"\nPositive sample: {positive_text}")
print(f"Predicted sentiment: {sentiment} (confidence: {confidence:.4f})")

sentiment, confidence = predict_sentiment(negative_text)
print(f"\nNegative sample: {negative_text}")
print(f"Predicted sentiment: {sentiment} (confidence: {confidence:.4f})")

In [None]:

# Additional test cases
test_cases = [
    "The worst movie I've ever seen!",
    "I absolutely hated everything about this.",
    "Don't waste your money on this garbage.",
    "Brilliant performance and amazing story!",
    "Such a disappointment, terrible plot."
]

print("\nAdditional test cases:")
for text in test_cases:
    sentiment, confidence = predict_sentiment(text)
    print(f"\nText: {text}")
    print(f"Predicted sentiment: {sentiment} (confidence: {confidence:.4f})")