# **26/11/24**

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [None]:
# LOAD IMDB Dataset
vocab_size=10000#only consider top 10,000 words
max_length=500 #maximum length of each sequence
(x_train,y_train),(x_test,y_test)=imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
#preprocess data: pad sequences to make them all the same length - scaling
x_train=pad_sequences(x_train,maxlen=max_length)
x_test=pad_sequences(x_test,maxlen=max_length)

In [None]:
#define model
model=Sequential([
    Embedding(input_dim=vocab_size,output_dim=32,input_length=max_length),
    SimpleRNN(units=32,return_sequences=False),#rnn layer
    Dense(1,activation='sigmoid')#o/p layer
])



In [None]:
#compile model
model.compile(optimizer='adam',loss="binary_crossentropy",metrics=['accuracy'])

In [None]:
#train model
print('Training the model...')
model.fit(x_train,y_train,epochs=3,batch_size=64,validation_split=0.2)

Training the model...
Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 149ms/step - accuracy: 0.5545 - loss: 0.6795 - val_accuracy: 0.7004 - val_loss: 0.5761
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 147ms/step - accuracy: 0.7945 - loss: 0.4525 - val_accuracy: 0.8118 - val_loss: 0.4346
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 147ms/step - accuracy: 0.9158 - loss: 0.2218 - val_accuracy: 0.6132 - val_loss: 0.8038


<keras.src.callbacks.history.History at 0x7e05bf69c490>

In [None]:
#evaluate model
print("\nEvaluating the model...")
loss,accuracy=model.evaluate(x_test,y_test)
print(f"Test loss:{loss:.4f}")
print(f"Test accuracy:{accuracy*100:.2f}%")


Evaluating the model...
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 27ms/step - accuracy: 0.6175 - loss: 0.7801
Test loss:0.7841
Test accuracy:61.85%


In [None]:
#test with custom i/p
#decode imdb word index
word_index=imdb.get_word_index()
reverse_word_index={value:key for key, value in word_index.items()}
def decode_review(sequence):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in sequence])
text_review=x_test[0]
test_review_padded=pad_sequences([text_review],maxlen=max_length)
prediction=model.predict(test_review_padded)
print("Sentiment Prediction:","Positive" if prediction>0.5 else "Negative")
#print("\n Decoded review:",decode_review(test_review))
#print("Sentiment")if.modrlpredict

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Sentiment Prediction: Negative


In [None]:
#1.load imdb word index
word_index=imdb.get_word_index()
reverse_word_index={value:key for key,value in word_index.items()}

In [None]:
#2. preprocess custom review
def preprocess_review(review):
    words=review_text.lower().split()#convert review lowercase and split into words
    # convert words to integers using imdb word index
    tokenized_review=[word_index.get(word,0) for word in words]# use 2 for unknown
    # pad tokenized seq
    return pad_sequences([tokenized_review],maxlen=max_length)


In [None]:
#3.test custom i/p
custom_review=input("Enter your review: ")
test_review_padded=preprocess_review(custom_review)
prediction=model.predict(test_review_padded)
print("Sentiment Prediction:","Positive" if prediction>0.5 else "Negative