In [1]:
sentences = ["I loved the movie, it was fantastic!",
             "The food was terrible, I wouldn't recommend it.",
             "The book was amazing, couldn't put it down.",
             "It was terrible film"]

In [2]:
import numpy as np
labels = np.array([1, 0, 1, 0])

In [4]:
!pip install tensorflow keras



In [8]:
!pip install keras



In [13]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)
sequences

[[4, 6, 3, 7, 1, 2, 8],
 [3, 9, 2, 5, 4, 10, 11, 1],
 [3, 12, 2, 13, 14, 15, 1, 16],
 [1, 2, 5, 17]]

In [14]:
word_index = tokenizer.word_index
word_index

{'it': 1,
 'was': 2,
 'the': 3,
 'i': 4,
 'terrible': 5,
 'loved': 6,
 'movie': 7,
 'fantastic': 8,
 'food': 9,
 "wouldn't": 10,
 'recommend': 11,
 'book': 12,
 'amazing': 13,
 "couldn't": 14,
 'put': 15,
 'down': 16,
 'film': 17}

In [25]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
max_len = max([len(seq) for seq in sequences])

padded = pad_sequences(sequences, maxlen=max_len)
print(padded)


[[ 0  4  6  3  7  1  2  8]
 [ 3  9  2  5  4 10 11  1]
 [ 3 12  2 13 14 15  1 16]
 [ 0  0  0  0  1  2  5 17]]


In [26]:
print('max_len: ', max_len)

max_len:  8


In [29]:
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense

voc_size = len(tokenizer.word_index) +1

embedding_dim = 32

model = Sequential()

model.add(Input(shape=(max_len, )))

model.add(Embedding(input_dim = voc_size, output_dim = embedding_dim, input_length = max_len))

model.add(GRU(32))

model.add(Dense(1, activation = 'sigmoid'))          



In [32]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(padded, labels, epochs=10, batch_size=1)

Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.0000e+00 - loss: 0.6986
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5333 - loss: 0.6921    
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.6830
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.6800
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 0.6746
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.6648
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.6600
Epoch 8/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.6538
Epoch 9/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x297849a8b90>

In [35]:
loss, accuracy = model.evaluate(padded, labels)
print(f"Accuracy: {accuracy*100: .2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step - accuracy: 1.0000 - loss: 0.6131
Accuracy:  100.00%


In [41]:
from tensorflow.keras.preprocessing.sequence import pad_sequences  

In [45]:
test = ["my whole body feels itchy and like its on fire", 
        "this week is not going as i had hoped"]

test = tokenizer.texts_to_sequences(test)

from tensorflow.keras.preprocessing.sequence import pad_sequences  
padded_sequences = pad_sequences(test, maxlen=max_len)

predictions = model.predict(padded_sequences)

for sentences, prediction in zip(test, predictions):
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print(f"Sentence: {sentences} - Sentiment: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Sentence: [] - Sentiment: Negative
Sentence: [4] - Sentiment: Negative
