#### Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
import re
import tensorflow
import keras

  if not hasattr(np, "object"):


* Ignoring warnings

In [2]:
import warnings
warnings.filterwarnings('ignore')

* Setting Environment variables

In [3]:
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

#### Importing Keras libraries

In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM

In [6]:
df = pd.read_csv("../data/IMDB Dataset.csv")
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


#### Preprocessing the data

In [7]:
from preprocessing import to_lowercase, remove_html_tags, remove_punctuation

df["review"] = df["review"].apply(to_lowercase)
df["review"] = df["review"].apply(remove_html_tags)
df["review"] = df["review"].apply(remove_punctuation)

In [8]:
df.head()

Unnamed: 0,review,sentiment
0,one of the other reviewers has mentioned that ...,positive
1,a wonderful little production the filming tec...,positive
2,i thought this was a wonderful way to spend ti...,positive
3,basically there s a family where a little boy ...,negative
4,petter mattei s love in the time of money is...,positive


In [9]:
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})
X = df["review"]
y = df["sentiment"]

* Splitting the data into training, validation and testing set

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

In [11]:
MAX_FEATURES = 10000   # vocabulary size
MAX_LEN = 200
tokenizer = Tokenizer(num_words=MAX_FEATURES, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

In [12]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [13]:
X_train_padded = pad_sequences(X_train_seq, maxlen=MAX_LEN)
X_val_padded = pad_sequences(X_val_seq, maxlen=MAX_LEN)
X_test_padded = pad_sequences(X_test_seq, maxlen=MAX_LEN)

In [14]:
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam

In [19]:
model = Sequential()
model.add(Embedding(input_dim=MAX_FEATURES,output_dim=128,input_shape=(MAX_LEN,)))
model.add(LSTM(units=64))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer=Adam(learning_rate=0.001),metrics=['accuracy'])
model.summary()

In [21]:
model.fit(
    X_train_padded, y_train,
    epochs=5,
    batch_size=64,
    validation_data=(X_val_padded, y_val),
    verbose=1
)
score = model.evaluate(X_test_padded, y_test, verbose=0)
print(f"\nTest accuracy: {score[1]:.2f}")

Epoch 1/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 249ms/step - accuracy: 0.8012 - loss: 0.4234 - val_accuracy: 0.8700 - val_loss: 0.3048
Epoch 2/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 225ms/step - accuracy: 0.8996 - loss: 0.2533 - val_accuracy: 0.8867 - val_loss: 0.2801
Epoch 3/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 153ms/step - accuracy: 0.9296 - loss: 0.1888 - val_accuracy: 0.8802 - val_loss: 0.3031
Epoch 4/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 149ms/step - accuracy: 0.9463 - loss: 0.1460 - val_accuracy: 0.8875 - val_loss: 0.3713
Epoch 5/5
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 152ms/step - accuracy: 0.9605 - loss: 0.1136 - val_accuracy: 0.8898 - val_loss: 0.3570

Test accuracy: 0.89


In [22]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential()
    model.add(Embedding(input_dim=10000, output_dim=hp.Choice('embed_dim', [100,128,200]),input_shape=(200,)))
    model.add(LSTM(units=hp.Choice('lstm_units', [64,128,256])))
    model.add(Dropout(rate=hp.Float('dropout', 0.2, 0.5, step=0.1)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(hp.Choice('lr', [1e-3, 5e-4, 1e-4])), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [30]:
tuner = kt.RandomSearch(build_model,
                        objective='val_accuracy',
                        max_trials=10,
                        executions_per_trial=1,
                        directory='tuner_logs',
                        project_name='lstm_sentiment')
tuner.search(X_train_padded, y_train,epochs=5,validation_split=0.2)

Trial 10 Complete [00h 24m 09s]
val_accuracy: 0.8733333349227905

Best val_accuracy So Far: 0.8888888955116272
Total elapsed time: 23h 37m 07s


In [35]:
best_model = tuner.get_best_models()[0]

In [None]:
model.save("sentiment_analysis2.keras")

In [None]:
loaded_model = keras.models.load_model('sentiment_analysis2.keras')
loaded_model


<Sequential name=sequential_1, built=True>

In [None]:
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")


['tokenizer.pkl']

In [None]:
def predict_sentiment(review_text):
    text = review_text.lower()
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(r'[^a-z0-9\s]', '', text)

    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=MAX_LEN)

    prediction = model.predict(padded)[0][0]
    return f"{'Positive' if prediction >= 0.5 else 'Negative'} (Probability: {prediction:.2f})"


sample_review = "The food was great."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
Sentiment: Negative (Probability: 0.48)


In [None]:
sample_review = "The songs are over hyped."
print(f"Review: {sample_review}")
print(f"Sentiment: {predict_sentiment(sample_review)}")

Review: The songs are over hyped.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
Sentiment: Negative (Probability: 0.34)


In [None]:
while True:
    user_input = input("\nEnter a tweet (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    print(f"Predicted Sentiment: {predict_sentiment(user_input)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
Predicted Sentiment: Negative (Probability: 0.29)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
Predicted Sentiment: Negative (Probability: 0.18)
