<a href="https://colab.research.google.com/github/Ahmed-Khaled-JS/Sentiment-Analysis-NLP-Task/blob/main/Sentiment_analysis_nlp_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import text_to_word_sequence

In [20]:
max_words = 10000
max_len = 200
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=max_words)
word_index = keras.datasets.imdb.get_word_index()
word_index = {k: (v + 3) for k, v in word_index.items()}  # Adjust indices
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3
reverse_word_index = {v: k for k, v in word_index.items()}


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
x_train = pad_sequences(x_train, maxlen=max_len, padding="post")
x_test = pad_sequences(x_test, maxlen=max_len, padding="post")

In [7]:
def create_model(model_type="RNN"):
    model = Sequential()
    model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))

    if model_type == "RNN":
        model.add(SimpleRNN(64, return_sequences=False))
    elif model_type == "LSTM":
        model.add(LSTM(64, return_sequences=False))
    elif model_type == "GRU":
        model.add(GRU(64, return_sequences=False))

    model.add(Dense(1, activation="sigmoid"))

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

In [8]:
model = create_model("LSTM")
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 395ms/step - accuracy: 0.5410 - loss: 0.6780 - val_accuracy: 0.6928 - val_loss: 0.5795
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 396ms/step - accuracy: 0.7028 - loss: 0.5410 - val_accuracy: 0.8427 - val_loss: 0.3931
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 394ms/step - accuracy: 0.8798 - loss: 0.3084 - val_accuracy: 0.8571 - val_loss: 0.3582
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 392ms/step - accuracy: 0.9164 - loss: 0.2218 - val_accuracy: 0.8700 - val_loss: 0.3148
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 391ms/step - accuracy: 0.9489 - loss: 0.1539 - val_accuracy: 0.8672 - val_loss: 0.3389


<keras.src.callbacks.history.History at 0x7c3e3c5c9290>

In [16]:
def preprocess_text(text):
    words = text_to_word_sequence(text)
    sequence = [word_index.get(word, 2) for word in words]  # 2 is for unknown words
    sequence = pad_sequences([sequence], maxlen=max_len, padding="post")
    return sequence



In [18]:
def predict_sentiment(review):
    processed_review = preprocess_text(review)
    prediction = model.predict(processed_review)[0][0]
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    return sentiment, prediction

In [21]:
review_text = "This movie was fantastic! I really enjoyed it."
sentiment, confidence = predict_sentiment(review_text)
print(f"Sentiment: {sentiment} (Confidence: {confidence:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step
Sentiment: Positive (Confidence: 0.90)
