In [49]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import joblib
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout

imdb_data = tfds.load("imdb_reviews", as_supervised=True)
train_data, test_data = imdb_data['train'], imdb_data['test']

train_texts = [text.numpy().decode("utf-8") for text, label in train_data]
train_labels = [label.numpy() for text, label in train_data]
test_texts = [text.numpy().decode("utf-8") for text, label in test_data]
test_labels = [label.numpy() for text, label in test_data]

num_words = 30000
maxlen = 300
tokenizer = Tokenizer(num_words=num_words, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

X_train = tokenizer.texts_to_sequences(train_texts)
X_test = tokenizer.texts_to_sequences(test_texts)

X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)
y_train = np.array(train_labels)
y_test = np.array(test_labels)

joblib.dump(tokenizer, "tokenizer.pkl")
print(" Tokenizer saved as tokenizer.pkl")

model = Sequential([
    Embedding(num_words, 128, input_length=maxlen),
    GRU(64, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)

loss, acc = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {acc:.4f}, Test Loss: {loss:.4f}")

model.save("imdb_gru_model.h5")
print("Model saved as imdb_gru_model.h5")


 Tokenizer saved as tokenizer.pkl
Epoch 1/5


2025-08-31 19:47:35.425765: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-08-31 19:47:35.602331: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


  1/313 [..............................] - ETA: 5:51 - loss: 0.6894 - accuracy: 0.6250

2025-08-31 19:47:35.787355: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-08-31 19:47:49.886233: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-08-31 19:47:49.939644: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.8519, Test Loss: 0.4936
Model saved as imdb_gru_model.h5
