In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
import matplotlib.pyplot as plt

# 1) Load IMDb dataset (most common 10k words)
max_words = 10000
max_len = 200

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

# 2) Pad sequences so all reviews have equal length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test  = pad_sequences(X_test, maxlen=max_len)

# 3) Build model
model = Sequential([
    Embedding(input_dim=max_words, output_dim=64, input_length=max_len),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# 4) Train
history = model.fit(X_train, y_train, epochs=5,
                    batch_size=128, validation_split=0.2)

# 5) Evaluate
loss, acc = model.evaluate(X_test, y_test)
print("Test accuracy:", acc)

# 6) Predict custom text
word_index = imdb.get_word_index()
print(word_index)
def encode_text(text):
    words = text.lower().split()
    encoded = [word_index.get(w, 2) for w in words]   # 2 = unknown token
    return pad_sequences([encoded], maxlen=max_len)

def sentiment(text):
    pred = model.predict(encode_text(text))[0][0]
    return "Positive" if pred > 0.5 else "Negative"

print(sentiment("This movie was great, I loved it!"))
print(sentiment("Worst movie I have ever seen."))

# 7) Plot accuracy
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='validation')
plt.title('Accuracy')
plt.legend()
plt.show()

# 8) Plot loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.title('Loss')
plt.legend()
plt.show()
