In [1]:
import numpy as np
import tensorflow as tf

In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
data = [
    ("I absolutely loved this movie", "positive"),
    ("The acting was terrible and the story was boring", "negative"),
    ("This film was masterpiece", "positive"),
    ("I couldn't stand to watch it, a complete waste of time", "negative"),
    ("The cinematograpy was stunning, a must watch movie", "positive")
]

In [5]:
review, sentiment = zip(*data)

In [6]:
review

('I absolutely loved this movie',
 'The acting was terrible and the story was boring',
 'This film was masterpiece',
 "I couldn't stand to watch it, a complete waste of time",
 'The cinematograpy was stunning, a must watch movie')

In [7]:
sentiment

('positive', 'negative', 'positive', 'negative', 'positive')

In [8]:
tokens = Tokenizer()

In [9]:
tokens.fit_on_texts(review)

In [10]:
vocab = len(tokens.word_index)+1

In [11]:
vocab

28

In [12]:
tokens.word_index

{'was': 1,
 'the': 2,
 'i': 3,
 'this': 4,
 'movie': 5,
 'watch': 6,
 'a': 7,
 'absolutely': 8,
 'loved': 9,
 'acting': 10,
 'terrible': 11,
 'and': 12,
 'story': 13,
 'boring': 14,
 'film': 15,
 'masterpiece': 16,
 "couldn't": 17,
 'stand': 18,
 'to': 19,
 'it': 20,
 'complete': 21,
 'waste': 22,
 'of': 23,
 'time': 24,
 'cinematograpy': 25,
 'stunning': 26,
 'must': 27}

In [13]:
seq = tokens.texts_to_sequences(review)

In [14]:
seq

[[3, 8, 9, 4, 5],
 [2, 10, 1, 11, 12, 2, 13, 1, 14],
 [4, 15, 1, 16],
 [3, 17, 18, 19, 6, 20, 7, 21, 22, 23, 24],
 [2, 25, 1, 26, 7, 27, 6, 5]]

In [15]:
max_len = max([len(i) for i in seq])

In [16]:
max_len

11

In [17]:
pad_seq = sequence.pad_sequences(seq, maxlen=max_len, padding="post")

In [18]:
pad_seq

array([[ 3,  8,  9,  4,  5,  0,  0,  0,  0,  0,  0],
       [ 2, 10,  1, 11, 12,  2, 13,  1, 14,  0,  0],
       [ 4, 15,  1, 16,  0,  0,  0,  0,  0,  0,  0],
       [ 3, 17, 18, 19,  6, 20,  7, 21, 22, 23, 24],
       [ 2, 25,  1, 26,  7, 27,  6,  5,  0,  0,  0]])

In [19]:
labels = np.array([1 if i == "positive" else 0 for i in sentiment])

In [20]:
labels

array([1, 0, 1, 0, 1])

In [21]:
model = Sequential([
    Embedding(vocab, 20, input_length=max_len),
    SimpleRNN(20),
    Dense(1, activation="sigmoid")
])

In [22]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [23]:
model.fit(pad_seq, labels, epochs=30)

Epoch 1/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.8000 - loss: 0.6119
Epoch 2/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.8000 - loss: 0.5834
Epoch 3/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.8000 - loss: 0.5562
Epoch 4/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.8000 - loss: 0.5302
Epoch 5/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 1.0000 - loss: 0.5048
Epoch 6/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 1.0000 - loss: 0.4794
Epoch 7/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 1.0000 - loss: 0.4536
Epoch 8/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 1.0000 - loss: 0.4272
Epoch 9/30
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x25b5512adb0>

In [24]:
new_review = "Incredible performance and gripping storyline"

In [25]:
new_seq = tokens.texts_to_sequences([new_review])

In [26]:
new_seq

[[12]]

In [27]:
new_pad_seq = sequence.pad_sequences(new_seq, maxlen=max_len, padding="post")

In [28]:
new_pad_seq

array([[12,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [29]:
np.round(model.predict(new_pad_seq))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 467ms/step


array([[1.]], dtype=float32)

In [30]:
dummy = "The movie was very good, must recommended"

In [31]:
dummy_seq = tokens.texts_to_sequences([dummy])

In [32]:
dummy_seq

[[2, 5, 1, 27]]

In [33]:
dummy_seq_pad = sequence.pad_sequences(dummy_seq, maxlen=max_len, padding="post")

In [34]:
dummy_seq_pad

array([[ 2,  5,  1, 27,  0,  0,  0,  0,  0,  0,  0]])

In [35]:
np.round(model.predict(dummy_seq_pad))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step


array([[1.]], dtype=float32)