In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.layers import SimpleRNN, Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential

texts = ["I love this movie", "This is a great film", "I really liked the acting", "The story was boring", "I hate the movie", "It was a waste of time"]
texts = ["SOS" + sentence + "EOS" for sentence in texts]
print(texts)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
print("word index:", word_index)

Sequences = tokenizer.texts_to_sequences(texts)
print("Sequences:", Sequences)

max_length = max(len(seq) for seq in Sequences)
padded_sequences = pad_sequences(Sequences, maxlen=max_length, padding='post', value = 0)
print("Padded Sequences:\n", padded_sequences)

lables = [1, 1, 1, 0, 0, 0]
y = np.array(lables)
print("lables:", y)

vocab_size = len(word_index) + 1
x = to_categorical(padded_sequences, num_classes=vocab_size)
print("one hot encoding:\n", x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = Sequential([
    SimpleRNN(32, activation='relu', return_sequences=False ,input_shape=(max_length, vocab_size)),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()
history = model.fit(x_train, y_train, epochs=10, batch_size = 2 ,validation_data=(x_test, y_test))

loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

sample_text = ["SOS This is a great film EOS"]
sample_sequence = tokenizer.texts_to_sequences(sample_text)
sample_pad = pad_sequences(sample_sequence, maxlen=max_length, padding='post', value=0)
sample_one_hot = to_categorical(sample_pad, num_classes=vocab_size)

prediction = model.predict(sample_one_hot)
print("Sentiment:", "Positive" if prediction > 0.5 else "Negative")

['SOSI love this movieEOS', 'SOSThis is a great filmEOS', 'SOSI really liked the actingEOS', 'SOSThe story was boringEOS', 'SOSI hate the movieEOS', 'SOSIt was a waste of timeEOS']
word index: {'sosi': 1, 'movieeos': 2, 'a': 3, 'the': 4, 'was': 5, 'love': 6, 'this': 7, 'sosthis': 8, 'is': 9, 'great': 10, 'filmeos': 11, 'really': 12, 'liked': 13, 'actingeos': 14, 'sosthe': 15, 'story': 16, 'boringeos': 17, 'hate': 18, 'sosit': 19, 'waste': 20, 'of': 21, 'timeeos': 22}
Sequences: [[1, 6, 7, 2], [8, 9, 3, 10, 11], [1, 12, 13, 4, 14], [15, 16, 5, 17], [1, 18, 4, 2], [19, 5, 3, 20, 21, 22]]
Padded Sequences:
 [[ 1  6  7  2  0  0]
 [ 8  9  3 10 11  0]
 [ 1 12 13  4 14  0]
 [15 16  5 17  0  0]
 [ 1 18  4  2  0  0]
 [19  5  3 20 21 22]]
lables: [1 1 1 0 0 0]
one hot encoding:
 [[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 

  super().__init__(**kwargs)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 453ms/step - accuracy: 0.5000 - loss: 0.6736 - val_accuracy: 1.0000 - val_loss: 0.5258
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.3333 - loss: 0.7190 - val_accuracy: 1.0000 - val_loss: 0.5384
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.3333 - loss: 0.6975 - val_accuracy: 1.0000 - val_loss: 0.5500
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.8333 - loss: 0.6242 - val_accuracy: 1.0000 - val_loss: 0.5593
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.6667 - loss: 0.6306 - val_accuracy: 1.0000 - val_loss: 0.5688
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275ms/step - accuracy: 0.6667 - loss: 0.6440 - val_accuracy: 1.0000 - val_loss: 0.5790
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━