<a href="https://colab.research.google.com/github/AharonRemez/RNN---Classification/blob/main/RNN_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

In [None]:
dataset = tfds.load('imdb_reviews', as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

batch_size = 32
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)


Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.B6QDFO_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.B6QDFO_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.B6QDFO_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [None]:
example, label = next(iter(train_dataset))
print('Text:\n', example.numpy()[0])
print('\nLabel: ', label.numpy()[0])

Text:
 b"Watched this film having really enjoyed Gregory's Girl many years ago. This was drivel. The plot was vaguely distasteful with the teacher and his friend perving over 14-15-year-old girls in very short skirts. Previous commenters seem to think that this doesn't matter, but I found it rather nasty. If you have children at school then the last thing you want is to think that every youngish teacher is lusting after his pupils. We were surprised that the censor let that through. Apart from that the film was just a waste of time. The script was poor and John Gordon Sinclair trying too hard to recreate his schoolboy image, slightly wacky and off the wall. Why anyone would want to lust after him in this performance is incredible. This film failed on all counts for me. Dreadful. Please don't waste your time watching it. Life's too short"

Label:  0


In [None]:
encoder = tf.keras.layers.TextVectorization(max_tokens=10000, output_sequence_length=100) # Set output_sequence_length
encoder.adapt(train_dataset.map(lambda text, _: text))

vocabulary = np.array(encoder.get_vocabulary())

original_text = example.numpy()[0]
encoded_text = encoder(original_text).numpy()
decoded_text = ' '.join(vocabulary[encoded_text])

print('original: ', original_text)
print('encoded: ', encoded_text)
print('decoded: ', decoded_text)


original:  b"Watched this film having really enjoyed Gregory's Girl many years ago. This was drivel. The plot was vaguely distasteful with the teacher and his friend perving over 14-15-year-old girls in very short skirts. Previous commenters seem to think that this doesn't matter, but I found it rather nasty. If you have children at school then the last thing you want is to think that every youngish teacher is lusting after his pupils. We were surprised that the censor let that through. Apart from that the film was just a waste of time. The script was poor and John Gordon Sinclair trying too hard to recreate his schoolboy image, slightly wacky and off the wall. Why anyone would want to lust after him in this performance is incredible. This film failed on all counts for me. Dreadful. Please don't waste your time watching it. Life's too short"
encoded:  [ 284   11   20  252   63  492    1  247  106  148  598   11   14 3856
    2  114   14 4795    1   17    2 1721    3   25  465    1  126

In [None]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        len(encoder.get_vocabulary()), 64, mask_zero=True),
    tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

model.build(input_shape=(None,))
model.summary()

In [None]:
model.compile(
	loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
	optimizer=tf.keras.optimizers.Adam(),
	metrics=['accuracy']
)

In [None]:
# Training the model and validating it on test set
history = model.fit(
	train_dataset,
	epochs=5,
	validation_data=test_dataset,
)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 30ms/step - accuracy: 0.6729 - loss: 0.5546 - val_accuracy: 0.8131 - val_loss: 0.4307
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 29ms/step - accuracy: 0.8715 - loss: 0.3013 - val_accuracy: 0.8028 - val_loss: 0.4297
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 29ms/step - accuracy: 0.9203 - loss: 0.2010 - val_accuracy: 0.7713 - val_loss: 0.5025
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 29ms/step - accuracy: 0.9572 - loss: 0.1150 - val_accuracy: 0.7807 - val_loss: 0.6126
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 29ms/step - accuracy: 0.9772 - loss: 0.0655 - val_accuracy: 0.7773 - val_loss: 0.9245


In [None]:
sample_text = (
    '''The movie by GeeksforGeeks was so good and the animation are so dope.
    I would recommend my friends to watch it.'''
)

decoded_sample_text = ' '.join([vocabulary[i] for i in encoded_sample_text])
input_tensor = tf.constant([decoded_sample_text])  # Wrap in a list for batch dimension

predictions = model.predict(input_tensor)
print(*predictions[0])

if predictions[0] > 0:
    print('The review is positive')
else:
    print('The review is negative')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
6.0811267
The review is positive
