#### Importing necessary libraries

In [2]:
import numpy as np
import pandas as pd
import re

In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

  if not hasattr(np, "object"):


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM

In [5]:
df = pd.read_csv("../data/IMDB Dataset.csv")
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [6]:
from preprocessing import to_lowercase, remove_html_tags, remove_punctuation

df["review"] = df["review"].apply(to_lowercase)
df["review"] = df["review"].apply(remove_html_tags)
df["review"] = df["review"].apply(remove_punctuation)

In [7]:
df.head()

Unnamed: 0,review,sentiment
0,one of the other reviewers has mentioned that ...,positive
1,a wonderful little production the filming tec...,positive
2,i thought this was a wonderful way to spend ti...,positive
3,basically there s a family where a little boy ...,negative
4,petter mattei s love in the time of money is...,positive


In [8]:
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})
X = df["review"]
y = df["sentiment"]

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

In [10]:
MAX_FEATURES = 10000
MAX_LEN = 200
tokenizer = Tokenizer(num_words=MAX_FEATURES)
tokenizer.fit_on_texts(X_train)

In [11]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [12]:
X_train_padded = pad_sequences(X_train_seq, maxlen=MAX_LEN)
X_val_padded = pad_sequences(X_val_seq, maxlen=MAX_LEN)
X_test_padded = pad_sequences(X_test_seq, maxlen=MAX_LEN)

In [21]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2

In [22]:
model = Sequential()
model.add(Embedding(input_dim=MAX_FEATURES,output_dim=128,input_shape=(MAX_LEN,)))
model.add(LSTM(units=64, kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(units=1, activation='sigmoid', kernel_regularizer=l2(0.001)))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [23]:
model.fit(
    X_train_padded, y_train,
    epochs=5,
    batch_size=64,
    validation_data=(X_val_padded, y_val),
    verbose=1
)
score = model.evaluate(X_test_padded, y_test, verbose=0)
print(f"\nTest accuracy: {score[1]:.2f}")

Epoch 1/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 218ms/step - accuracy: 0.8181 - loss: 0.4296 - val_accuracy: 0.8690 - val_loss: 0.3344
Epoch 2/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 216ms/step - accuracy: 0.8992 - loss: 0.2766 - val_accuracy: 0.8795 - val_loss: 0.3059
Epoch 3/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 214ms/step - accuracy: 0.9210 - loss: 0.2240 - val_accuracy: 0.8730 - val_loss: 0.3362
Epoch 4/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 217ms/step - accuracy: 0.9339 - loss: 0.2018 - val_accuracy: 0.8780 - val_loss: 0.3161
Epoch 5/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 215ms/step - accuracy: 0.9438 - loss: 0.1740 - val_accuracy: 0.8745 - val_loss: 0.3702

Test accuracy: 0.88


In [24]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=MAX_LEN)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"


sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 496ms/step
Sentiment: Positive (Probability: 0.68)


In [25]:
sample_review = "The songs are over hyped."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The songs are over hyped.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
Sentiment: Negative (Probability: 0.37)


In [26]:
while True:
    user_input = input("\nEnter a tweet (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    print(f"Predicted Sentiment: {predict_sentiment(user_input)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
Predicted Sentiment: Positive (Probability: 0.60)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
Predicted Sentiment: Positive (Probability: 0.57)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
Predicted Sentiment: Positive (Probability: 0.77)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
Predicted Sentiment: Positive (Probability: 0.69)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
Predicted Sentiment: Positive (Probability: 0.65)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
Predicted Sentiment: Positive (Probability: 0.85)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
Predicted Sentiment: Negative (Probability: 0.39)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
Predicted Sentiment: Positive (Probability: 0.53)
[1m1/1[0m [32m━