<a href="https://colab.research.google.com/github/EslamMagdy12/NLP-SentimentAnalysis/blob/main/NLP_Sentiment_Analysis_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Without RNN

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("/content/drive/MyDrive/CV/IMDB Dataset.csv")

X = df['review']
Y = df['sentiment']

label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=10000)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

predictions = log_reg.predict(X_test[:5])

print("\nPredictions for the first 5 test samples:")
for pred in predictions:
    label = label_encoder.inverse_transform([pred])[0]
    print(label)

print("\nTrue values for the first 5 test samples:")
for true_val in y_test[:5]:
    label = label_encoder.inverse_transform([true_val])[0]
    print(label)

Test Accuracy: 89.85%

Predictions for the first 5 test samples:
negative
positive
negative
positive
negative

True values for the first 5 test samples:
positive
positive
negative
positive
negative


With RNN

In [4]:
from tensorflow import keras

(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=200)
X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=200)

n_features = X_train.shape[1]

model = keras.Sequential([
    keras.layers.Embedding(10000, 128, input_shape=(n_features,)),
    keras.layers.LSTM(128),
    keras.layers.Dense(1, activation='sigmoid')
])

optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

history = model.fit(X_train, y_train, epochs=10)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy * 100}%")

predictions = model.predict(X_test[:5])

print("\nPredictions for the first 5 test samples:")
for pred in predictions:
    label = "positive" if pred[0] >= 0.5 else "negative"
    print(label)

print("\nTrue values for the first 5 test samples:")
for true_val in y_test[:5]:
    label = "positive" if true_val == 1 else "negative"
    print(label)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(**kwargs)


Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.7169 - loss: 0.5316
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 12ms/step - accuracy: 0.8760 - loss: 0.3059
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9286 - loss: 0.1954
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - accuracy: 0.9514 - loss: 0.1398
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - accuracy: 0.9668 - loss: 0.0992
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9729 - loss: 0.0755
Epoch 7/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.9815 - loss: 0.0545
Epoch 8/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - accuracy: 0.9901 - loss: 0.0338
Epoch 9/10
[1m782/782[0m 