# Design RNN or its variant including LSTM or GRU
a) Select a suitable time series dataset. Example – predict sentiments based on product reviews

b) Apply for prediction

In [None]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load data (only top 10,000 words)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to have same input length
x_train = pad_sequences(x_train, maxlen=200)
x_test = pad_sequences(x_test, maxlen=200)

#### from tensorflow.keras.datasets import imdb
#### from tensorflow.keras.preprocessing.sequence import pad_sequences
    imdb: Built-in movie reviews dataset (sentiment labeled).

    pad_sequences: Helps make all input sequences the same length (needed for LSTM).

#### Loads the dataset:

    x_train: list of reviews (as sequences of integers)

    y_train: corresponding labels (1 = positive, 0 = negative)

    num_words=10000: only use the top 10,000 most common words to limit vocabulary size.

#### x_train = pad_sequences(x_train, maxlen=200)
#### x_test = pad_sequences(x_test, maxlen=200)

    Ensures every review has exactly 200 tokens.

    Shorter sequences are padded with zeros at the beginning.

    Longer ones are trimmed from the start.

    This creates a uniform input shape of (samples, 200) needed for neural networks.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128, input_length=200))
model.add(LSTM(units=128))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

### Import Keras model architecture tools.

    Embedding: Turns word indexes into vectors.

    LSTM: Long Short-Term Memory layer (remembers sequences).

    Dense: Standard neural net layer.
### Embedding Layer: model.add(Embedding(input_dim=10000, output_dim=128, input_length=200))

    input_dim=10000: Total words in vocab.

    output_dim=128: Each word will be converted to a vector of 128 values.

    input_length=200: Input reviews are 200 tokens long (after padding).

    Result: Converts shape from (batch_size, 200) → (batch_size, 200, 128)
### model.add(LSTM(units=128))
LSTM layer processes the sequence of word vectors:

    units=128: Memory size of LSTM

Output shape becomes (batch_size, 128) after summarizing time-series info.
### model.add(Dense(1, activation='sigmoid'))

    Outputs 1 neuron → predicts probability between 0 and 1.

    sigmoid: squashes output into range (0 to 1).
### Compile model:

    adam: Adaptive optimizer, adjusts learning rate.

    binary_crossentropy: Used for binary classification (0 or 1).

    metrics=['accuracy']: Tracks how many predictions were correct.

In [None]:
history = model.fit(x_train, y_train,
                    epochs=3,
                    batch_size=64,
                    validation_data=(x_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 170ms/step - accuracy: 0.7396 - loss: 0.5123 - val_accuracy: 0.8401 - val_loss: 0.3822
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 171ms/step - accuracy: 0.8944 - loss: 0.2697 - val_accuracy: 0.7862 - val_loss: 0.4460
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 170ms/step - accuracy: 0.9210 - loss: 0.2085 - val_accuracy: 0.8552 - val_loss: 0.3704


In [None]:
# Evaluate accuracy on test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test Loss", test_loss*100)
print("Test Accuracy:", test_acc*100)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 26ms/step - accuracy: 0.8542 - loss: 0.3716
Test Loss 37.037211656570435
Test Accuracy: 85.51999926567078


In [None]:
# Load word index and reverse it
word_index = imdb.get_word_index()
reverse_word_index = {value: key for (key, value) in word_index.items()}

# Decode the review (subtract 3 for actual words)
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in x_test[1]])

# Print the review text
print("Sample Review (Text):")
print(decoded_review)

# Predict
sample = x_test[1].reshape(1, 200)
prediction = model.predict(sample)
print(prediction)

print("\nPredicted Sentiment:", "Positive" if prediction[0][0] > 0.5 else "Negative")

Sample Review (Text):
psychological ? it's very interesting that robert altman directed this considering the style and structure of his other films still the trademark altman audio style is evident here and there i think what really makes this film work is the brilliant performance by sandy dennis it's definitely one of her darker characters but she plays it so perfectly and convincingly that it's scary michael burns does a good job as the mute young man regular altman player michael murphy has a small part the ? moody set fits the content of the story very well in short this movie is a powerful study of loneliness sexual ? and desperation be patient ? up the atmosphere and pay attention to the wonderfully written script br br i praise robert altman this is one of his many films that deals with unconventional fascinating subject matter this film is disturbing but it's sincere and it's sure to ? a strong emotional response from the viewer if you want to see an unusual film some might ev

### ❗ i - 3 is used because:

Keras reserves:

0 = padding

1 = start of sequence

2 = unknown word

3 and above = actual word indexes