# Problem Statement
- Design RNN or its variant including LSTM or GRU

    > a) Select a suitable time series dataset. Example – predict sentiments based on product reviews.

    > b) Apply for prediction

## Import Dataset

In [1]:
from tensorflow.keras.datasets.imdb import load_data

(x_train, y_train), (x_test, y_test) = load_data(num_words=5000)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


((25000,), (25000,), (25000,), (25000,))

### Exploring Dataset (Optional)

In [2]:
from tensorflow.keras.datasets.imdb import get_word_index

word_indices = get_word_index()
word_indices = {val:key for key, val in word_indices.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


In [4]:
for word in x_train[1]:
    # print(word)
    if word >= 3:
        print(word_indices.get(word - 3, 0), end=' ')
    elif word != 1:
        print('?', end=' ')  # special tokens

big hair big ? bad music and a giant safety ? these are the words to best describe this terrible movie i love cheesy horror movies and i've seen hundreds but this had got to be on of the worst ever made the plot is paper thin and ridiculous the acting is an ? the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clothes are ? and funny in equal ? the hair is big lots of ? ? men wear those cut ? ? that show off their ? ? that men actually wore them and the music is just ? trash that plays over and over again in almost every scene there is trashy music ? and ? taking away bodies and the ? still doesn't close for ? all ? aside this is a truly bad film whose only charm is to look back on the disaster that was the 80's and have a good old laugh at how bad everything was back then 

## Preprocessing

In [5]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

padded_x_train = pad_sequences(x_train, maxlen = 400)
padded_x_test = pad_sequences(x_test, maxlen = 400)

In [None]:
padded_x_train[1]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

## Convert Text to Embedding Vector

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential

embedding_model = Sequential(
    [
        Embedding(input_dim=2000, output_dim=128)
    ]
)

embedding_x_train = embedding_model(padded_x_train)
embedding_x_test = embedding_model(padded_x_test)

## Building LSTM model

In [8]:
from tensorflow.keras.layers import LSTM, Dense

lstm_model = Sequential([
    LSTM(128),
    Dense(1,activation='sigmoid')
])

lstm_model.compile(
    loss='binary_crossentropy',
    metrics = ['accuracy'],
    optimizer= 'adam'
)

lstm_model.fit(embedding_x_train, y_train, batch_size=64,
                   epochs=10,
                   validation_split=0.2)

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 30ms/step - accuracy: 0.5456 - loss: 0.6891 - val_accuracy: 0.5596 - val_loss: 0.6784
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 29ms/step - accuracy: 0.6044 - loss: 0.6608 - val_accuracy: 0.6372 - val_loss: 0.6362
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - accuracy: 0.6564 - loss: 0.6171 - val_accuracy: 0.6750 - val_loss: 0.6052
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 29ms/step - accuracy: 0.6716 - loss: 0.6020 - val_accuracy: 0.6616 - val_loss: 0.6108
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 27ms/step - accuracy: 0.6838 - loss: 0.5910 - val_accuracy: 0.7020 - val_loss: 0.5826
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 27ms/step - accuracy: 0.6754 - loss: 0.5984 - val_accuracy: 0.6556 - val_loss: 0.6195
Epoch 7/10
[1m31

<keras.src.callbacks.history.History at 0x7f6a79984510>

In [9]:
test_loss, test_acc = lstm_model.evaluate(embedding_x_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

Test Accuracy: 72.15%


In [10]:
y_pred = lstm_model.predict(embedding_x_test)
y_pred

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step


array([[0.2442795 ],
       [0.8138994 ],
       [0.6742109 ],
       ...,
       [0.22901978],
       [0.55503577],
       [0.908347  ]], dtype=float32)