In [None]:
# Sentiment Analysis using LSTM (Colab Ready)

import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, LSTM, Dense
import pickle

# Step 1: Load IMDb Dataset
vocab_size = 10000
max_len = 200
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Step 2: Build LSTM Model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=max_len),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=3, validation_data=(x_test, y_test), batch_size=64)

# Step 3: Save Model and Tokenizer
model.save('sentiment_model.h5')

word_index = imdb.get_word_index()
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(word_index, f)

# Step 4: Prediction Function
def preprocess(text, max_len=200):
    words = text.lower().split()
    seq = [word_index.get(word, 0) for word in words]
    return pad_sequences([seq], maxlen=max_len)

def predict_sentiment(text):
    input_seq = preprocess(text)
    prediction = model.predict(input_seq)[0][0]
    return "Positive " if prediction > 0.5 else "Negative "

# Example
predict_sentiment("The movie was boring.")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/3




[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 291ms/step - accuracy: 0.7325 - loss: 0.4998 - val_accuracy: 0.8554 - val_loss: 0.3371
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 329ms/step - accuracy: 0.9105 - loss: 0.2337 - val_accuracy: 0.8726 - val_loss: 0.3016
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 328ms/step - accuracy: 0.9334 - loss: 0.1779 - val_accuracy: 0.8679 - val_loss: 0.3226




Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step


'Negative '