In [None]:
pip install tensorflow



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [None]:
# Step 2: Load and Preprocess Data (using IMDB dataset)
from tensorflow.keras.datasets import imdb

# Set parameters for text processing
vocab_size = 10000
max_length = 200

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform length for all input data
X_train = pad_sequences(X_train, maxlen=max_length, padding='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
# Step 3: Build the GRU model
def build_gru_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 128, input_length=max_length))
    model.add(GRU(128, return_sequences=True))
    model.add(GRU(128))  # Second GRU layer
    model.add(Dense(1, activation='sigmoid'))
    return model

# Initialize GRU model
gru_model = build_gru_model(vocab_size, max_length)



In [None]:
# Step 4: Train the GRU model
gru_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
gru_history = gru_model.fit(X_train, y_train, epochs=3, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m586s[0m 1s/step - accuracy: 0.5445 - loss: 0.6778 - val_accuracy: 0.7719 - val_loss: 0.5269
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m572s[0m 1s/step - accuracy: 0.8167 - loss: 0.4257 - val_accuracy: 0.8762 - val_loss: 0.2899
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m595s[0m 1s/step - accuracy: 0.9194 - loss: 0.2097 - val_accuracy: 0.8828 - val_loss: 0.2810


In [None]:
# Tokenizer to convert sequence back to text
tokenizer = tf.keras.preprocessing.text.Tokenizer()

# Convert X_train from array to list of strings
X_train_str = [' '.join(map(str, sequence)) for sequence in X_train]

# Fit tokenizer on the converted list of strings
tokenizer.fit_on_texts(X_train_str)

In [None]:
# Step 5: Text Generation using the trained GRU model
def generate_text(model, tokenizer, seed_text, max_sequence_len):
    for _ in range(50):  # Generate 50 words
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        output_word = ''
        for word, index in tokenizer.word_index.items():
            if index == np.argmax(predicted):
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Tokenizer to convert sequence back to text
tokenizer = tf.keras.preprocessing.text.Tokenizer()

# Convert X_train from array to list of strings
X_train_str = [' '.join(map(str, sequence)) for sequence in X_train]

# Fit tokenizer on the converted list of strings
tokenizer.fit_on_texts(X_train_str)

# Generate text using the trained GRU model
seed_text = "The movie was"
generated_text = generate_text(gru_model, tokenizer, seed_text, max_length)
print(f"Generated Text (GRU): {generated_text}")

Generated Text (GRU): The movie was                                                  


In [None]:
# Step 6: Evaluate GRU model accuracy
gru_test_loss, gru_test_accuracy = gru_model.evaluate(X_test, y_test)
print(f"GRU Model Accuracy: {gru_test_accuracy}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 121ms/step - accuracy: 0.8817 - loss: 0.2833
GRU Model Accuracy: 0.8827599883079529


In [None]:
# Step 7: Build the LSTM Model for comparison
def build_lstm_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 128, input_length=max_length))
    model.add(LSTM(128, return_sequences=True))  # First LSTM layer with return_sequences
    model.add(LSTM(128))  # Second LSTM layer
    model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
    return model

# Initialize LSTM model
lstm_model = build_lstm_model(vocab_size, max_length)

# Train the LSTM model
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
lstm_history = lstm_model.fit(X_train, y_train, epochs=3, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 1s/step - accuracy: 0.5382 - loss: 0.6870 - val_accuracy: 0.6392 - val_loss: 0.6412
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m557s[0m 1s/step - accuracy: 0.6888 - loss: 0.5876 - val_accuracy: 0.5830 - val_loss: 0.6533
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 1s/step - accuracy: 0.6941 - loss: 0.5589 - val_accuracy: 0.8032 - val_loss: 0.4477


In [None]:
# Step 8: Evaluate LSTM model accuracy
lstm_test_loss, lstm_test_accuracy = lstm_model.evaluate(X_test, y_test)
print(f"LSTM Model Accuracy: {lstm_test_accuracy}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 239ms/step - accuracy: 0.8003 - loss: 0.4552
LSTM Model Accuracy: 0.8032400012016296


In [None]:
# Step 9: Compare GRU and LSTM models
print(f"GRU Accuracy: {gru_test_accuracy}, LSTM Accuracy: {lstm_test_accuracy}")

GRU Accuracy: 0.8827599883079529, LSTM Accuracy: 0.8032400012016296
