In [21]:
# Import necessary tools
from tensorflow.keras.models import Sequential  # Basic neural network container
from tensorflow.keras.layers import Embedding, LSTM, Dense  # Layers we'll use
from tensorflow.keras.preprocessing.text import Tokenizer  # For text handling
from tensorflow.keras.utils import pad_sequences  # To make sequences same length
import numpy as np

In [22]:
texts = ["I loved this movie", "Hated the film", "Best movie ever", "Worst experience"] 
labels = np.array([1, 0, 1, 0])  # 1=positive, 0=negative

In [23]:
# Step 1: Prepare the text data
tokenizer = Tokenizer(num_words=10000)  # Keep top 10,000 words
tokenizer.fit_on_texts(texts)  # Learn all words in our texts
sequences = tokenizer.texts_to_sequences(texts)  # Convert words to numbers
data = pad_sequences(sequences, maxlen=100)  # Make all reviews 100 words long


In [24]:
# Step 2: Build the model
model = Sequential()  # Create empty model

# Add layers one by one:
# 1. Embedding: Turns word numbers into meaningful vectors
model.add(Embedding(input_dim=10000,  # How many unique words we have
                   output_dim=128))    # Size of each word vector

# 2. LSTM layer: Understands sequences in the text
model.add(LSTM(units=64))  # 64 memory units

# 3. Dense layer: Final decision maker (positive/negative)
model.add(Dense(1, activation='sigmoid'))  # 1 output: 0-1 probability

In [25]:
model.compile(optimizer='adam',       # Smart learning algorithm
              loss='binary_crossentropy',  # How to measure errors
              metrics=['accuracy'])   # Track correct guesses

In [26]:
# Step 4: Train the model
model.fit(data, labels,  # Our prepared data and answers
          epochs=5,      # How many times to see all data
          batch_size=32) # Process 32 reviews at once

Epoch 1/5


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.2500 - loss: 0.6937
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.7500 - loss: 0.6852
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 1.0000 - loss: 0.6767
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 1.0000 - loss: 0.6682
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 1.0000 - loss: 0.6594


<keras.src.callbacks.history.History at 0x1ec1020c890>

In [27]:
# Now the model can predict sentiments!
test_text = ["This film was okay"]
test_seq = tokenizer.texts_to_sequences(test_text)
test_data = pad_sequences(test_seq, maxlen=100)
prediction = model.predict(test_data)  # Returns something like [[0.65]] (65% positive)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step


array([[0.48232335]], dtype=float32)

In [29]:
# Now the model can predict sentiments!
test_text = ["This film was horrible"]
test_seq = tokenizer.texts_to_sequences(test_text)
test_data = pad_sequences(test_seq, maxlen=100)
prediction = model.predict(test_data)  # Returns something like [[0.65]] (65% positive)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


array([[0.48232335]], dtype=float32)