In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
# Load the IMDB dataset with top 10,000 frequent words
vocab_size = 10000
maxlen = 200

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen),
    LSTM(64),
    Dense(1, activation='sigmoid')
])




In [5]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [6]:
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 323ms/step - accuracy: 0.7129 - loss: 0.5352 - val_accuracy: 0.8618 - val_loss: 0.3313
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 321ms/step - accuracy: 0.8954 - loss: 0.2651 - val_accuracy: 0.8572 - val_loss: 0.3429
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 309ms/step - accuracy: 0.9321 - loss: 0.1783 - val_accuracy: 0.8702 - val_loss: 0.3261
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 321ms/step - accuracy: 0.9479 - loss: 0.1430 - val_accuracy: 0.8614 - val_loss: 0.3848
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 308ms/step - accuracy: 0.9692 - loss: 0.0914 - val_accuracy: 0.8696 - val_loss: 0.3913


<keras.src.callbacks.history.History at 0x7ccf03ecbad0>

In [7]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 40ms/step - accuracy: 0.8532 - loss: 0.4347
Test Accuracy: 0.86


In [24]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json


In [29]:

texts = [
    "Intelligent Movie. This movie is obviously allegorical, a fascinating tale about AI, but it is mainly about manipulation and power. It isn't for those wanting action or spectacular CGI, the movie is aimed at people who like to think, rather than passively wait to be entertained. There are themes here not only about AI, but also about surveillance, with excellent points about how data about us is collected by phone companies, search engine companies, commercial operating systems makers and so on. The plot seems simple but isn't, it's extremely clever, with the protagonist playing games, trying to stay one step ahead of one another. This is a movie with perfectly consistent internal logic that plays out perfectly. Don't go in expecting too much, however, as I can see most people will not be satisfied by this movie, but for me, it does what it sets out to do brilliantly. Therefore I give at least 9/10. And most recent movies have been getting 5/10 from me. This movie succeeds where another recent movie about AI, Transcendence, I think it is called, failed (but it was an interesting failure). A third movie about AI, a Spanish movie called Eva, was also brilliant. Eva was more moving and this movie more philosophical. But both movies were perfect in their different ways. The AI's name in this movie, Ava, seems to be a nod to the title of the Spanish movie. As an aside, it's nice that no \"stars\" appeared in \"Ex Machina\" and \"Eva\", the casting was great. Of course there are several aspects of this movie that are unrealistic and often absurd. But because this is an allegorical movie, these are acceptable, because the movie is making points, rather than striving for realism. It's more of a fairytale than accurate portrayal.",
    "Amazing visuals and philosophical concepts! Not only is the Eternals a visual masterpiece, the inner challenges each of the eternals face is a great signifier of the MCU moving into a more mature, meaningful phase. When you look deeper into the plot and each events significance, and appreciate the detail put into a timeline that spans thousands of years, the low underwhelming ratings seem obviously out of place. And despite the fact that many people loved the MCU as a couple of buff guys and over-sexualised women, the transition the MCU is going through is inevitable, as a franchise that has supported generations, and quite frankly awesome. Besides from the stunning visuals and deep undertones Chloe Zhao certainly packs in enough fight scenes as possibly needed. Each eternal represents a unique power, connected through their source of cosmic energy. This variability allows for exhilarating fight scenes that are arguably more interesting than a couple of punches, kicks and the stray flying weapon. The love stories are quaint and meaningful, and certainly don't take up too much of the run time. Overall, the new age of maturity and deeper concepts seen in this movie is most definitely something to look forward to in Phase 4!",
    "Worst MCU film ever Following the events of Avengers: Endgame (2019), an unexpected tragedy forces the Eternals, ancient aliens who have been living on Earth in secret for thousands of years, out of the shadows to reunite against mankind's most ancient enemy, the Deviants. This film is by far the worst MCU film I have watch. It's massive insuylt to the comics from the Jack Kirby days. Gender swapping some of the eternals, bad story telling which makes absolutely no sense, unlikeable characters, little to no action and ridicules changes to the MCU just sum up how bad this film. Completely ruined Endgame The only goods were the music and the CGI for Arishen as well as bonus points for being voiced by David Kaye. Other then that it's just not a good movie makes both Captain Marvel and Iron Man 3 look good.",
]

texts = [
    "Intelligent Movie. This movie is obviously allegorical, a fascinating tale about AI, but it is mainly about manipulation and power. It isn't for those wanting action or spectacular CGI, the movie is aimed at people who like to think, rather than passively wait to be entertained. There are themes here not only about AI, but also about surveillance, with excellent points about how data about us is collected by phone companies, search engine companies, commercial operating systems makers and so on. The plot seems simple but isn't, it's extremely clever, with the protagonist playing games, trying to stay one step ahead of one another. This is a movie with perfectly consistent internal logic that plays out perfectly. Don't go in expecting too much, however, as I can see most people will not be satisfied by this movie, but for me, it does what it sets out to do brilliantly. Therefore I give at least 9/10. And most recent movies have been getting 5/10 from me. This movie succeeds where another recent movie about AI, Transcendence, I think it is called, failed (but it was an interesting failure). A third movie about AI, a Spanish movie called Eva, was also brilliant. Eva was more moving and this movie more philosophical. But both movies were perfect in their different ways. The AI's name in this movie, Ava, seems to be a nod to the title of the Spanish movie. As an aside, it's nice that no \"stars\" appeared in \"Ex Machina\" and \"Eva\", the casting was great. Of course there are several aspects of this movie that are unrealistic and often absurd. But because this is an allegorical movie, these are acceptable, because the movie is making points, rather than striving for realism. It's more of a fairytale than accurate portrayal.",
    "Amazing visuals and philosophical concepts! Not only is the Eternals a visual masterpiece, the inner challenges each of the eternals face is a great signifier of the MCU moving into a more mature, meaningful phase. When you look deeper into the plot and each events significance, and appreciate the detail put into a timeline that spans thousands of years, the low underwhelming ratings seem obviously out of place. And despite the fact that many people loved the MCU as a couple of buff guys and over-sexualised women, the transition the MCU is going through is inevitable, as a franchise that has supported generations, and quite frankly awesome. Besides from the stunning visuals and deep undertones Chloe Zhao certainly packs in enough fight scenes as possibly needed. Each eternal represents a unique power, connected through their source of cosmic energy. This variability allows for exhilarating fight scenes that are arguably more interesting than a couple of punches, kicks and the stray flying weapon. The love stories are quaint and meaningful, and certainly don't take up too much of the run time. Overall, the new age of maturity and deeper concepts seen in this movie is most definitely something to look forward to in Phase 4!",
    "Worst MCU film ever Following the events of Avengers: Endgame (2019), an unexpected tragedy forces the Eternals, ancient aliens who have been living on Earth in secret for thousands of years, out of the shadows to reunite against mankind's most ancient enemy, the Deviants. This film is by far the worst MCU film I have watch. It's massive insuylt to the comics from the Jack Kirby days. Gender swapping some of the eternals, bad story telling which makes absolutely no sense, unlikeable characters, little to no action and ridicules changes to the MCU just sum up how bad this film. Completely ruined Endgame The only goods were the music and the CGI for Arishen as well as bonus points for being voiced by David Kaye. Other then that it's just not a good movie makes both Captain Marvel and Iron Man 3 look good.",
]

labels = [1, 0, 0]  # 1 = positive, 0 = negative


In [31]:

max_words = 10000

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)


In [32]:
sequences = tokenizer.texts_to_sequences(texts)
print(sequences)


[[103, 5, 8, 5, 7, 44, 45, 6, 104, 105, 10, 21, 13, 16, 7, 106, 10, 107, 3, 46, 16, 47, 14, 108, 109, 48, 110, 111, 49, 2, 5, 7, 112, 50, 32, 51, 113, 9, 52, 53, 22, 114, 115, 9, 33, 116, 54, 17, 117, 118, 23, 34, 10, 21, 13, 55, 10, 119, 35, 120, 36, 10, 56, 121, 10, 122, 7, 123, 24, 124, 57, 125, 126, 57, 127, 128, 129, 130, 3, 131, 58, 2, 59, 60, 132, 13, 47, 19, 133, 134, 35, 2, 135, 136, 137, 138, 9, 139, 61, 140, 141, 4, 61, 62, 8, 7, 6, 5, 35, 63, 142, 143, 144, 11, 145, 25, 63, 64, 146, 12, 147, 65, 66, 148, 15, 26, 149, 150, 27, 32, 151, 23, 33, 152, 24, 8, 5, 13, 14, 67, 16, 153, 154, 16, 155, 25, 9, 156, 157, 158, 26, 159, 50, 160, 161, 68, 3, 27, 69, 70, 37, 71, 162, 163, 68, 38, 67, 8, 5, 164, 165, 62, 69, 5, 10, 21, 166, 26, 52, 16, 7, 72, 167, 13, 16, 28, 29, 73, 168, 6, 169, 5, 10, 21, 6, 74, 5, 72, 39, 28, 55, 170, 39, 28, 20, 75, 3, 8, 5, 20, 76, 13, 77, 70, 78, 171, 12, 79, 172, 173, 2, 174, 175, 12, 8, 5, 176, 60, 9, 33, 6, 177, 9, 2, 178, 4, 2, 74, 5, 15, 29, 179, 

In [33]:
max_len = 20  # max length of each sequence

padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
print(padded_sequences)


[[193  81   2   5   7 194  36  53  22 195  14 196  19  20   4   6 197  22
  198 199]
 [  4 275   3  86  83 276  12   8   5   7  27 277 278   9  43 279   9  12
   85 280]
 [330 331 332  11  19 101  23   6 102   5 100  77 333 334   3 335 336 337
   43 102]]


In [34]:
# Save tokenizer to JSON file
tokenizer_json = tokenizer.to_json()
with open('tokenizer.json', 'w') as f:
    f.write(tokenizer_json)


In [35]:
from tensorflow.keras.preprocessing.text import tokenizer_from_json

with open('tokenizer.json') as f:
    data = f.read()
    tokenizer = tokenizer_from_json(data)


In [36]:
from google.colab import files

files.download('tokenizer.json')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [38]:
sample_reviews = [
    "This movie was absolutely fantastic!",
    "I hated every second of it.",
    "The plot was boring, but the acting was great."
]

# Preprocess
sample_sequences = tokenizer.texts_to_sequences(sample_reviews)
padded = pad_sequences(sample_sequences, maxlen=200)

# Predict
predictions = model.predict(padded)
for review, score in zip(sample_reviews, predictions):
    sentiment = "Positive" if score > 0.5 else "Negative"
    print(f"Review: {review} — Sentiment: {sentiment} ({score[0]:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
Review: This movie was absolutely fantastic! — Sentiment: Positive (0.62)
Review: I hated every second of it. — Sentiment: Negative (0.26)
Review: The plot was boring, but the acting was great. — Sentiment: Negative (0.42)
