LSTM model for sentiment analysis on a movie review dataset. It also includes functions for making predictions on new text inputs.

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
import numpy as np
import pandas as pd
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [23]:
# Load IMDb dataset from CSV
file_path = '/content/drive/MyDrive/datasets/IMDB Dataset.csv'
df = pd.read_csv(file_path)

In [24]:
# Set parameters
max_features = 5000
maxlen = 300
batch_size = 64

In [25]:
# Assuming your CSV has columns 'text' for reviews and 'label' for sentiment
texts = df['review'].values
labels = df['sentiment'].values

In [26]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [27]:
x = pad_sequences(sequences, maxlen=maxlen)
y = labels

In [28]:
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [29]:
# Build LSTM model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))



In [30]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [31]:
# Display the model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 300, 128)          640000    
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 689473 (2.63 MB)
Trainable params: 689473 (2.63 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [37]:
from sklearn.preprocessing import LabelEncoder

# Convert string labels to numerical labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)


In [38]:
# Train the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=3, batch_size=batch_size)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x78f3d9625600>

Input text is provided ("This movie was fantastic! The acting was superb."), and the sentiment prediction is printed along with the probability.

In [39]:
# Function to preprocess input text
def preprocess_input(text):
    # Tokenize and pad the input text
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    return padded_sequence

In [40]:
# Function to get sentiment prediction
def predict_sentiment(text):
    preprocessed_text = preprocess_input(text)
    prediction = model.predict(preprocessed_text)[0][0]
    sentiment = 'Positive' if prediction > 0.5 else 'Negative'
    return sentiment, prediction

In [51]:
# Example input text
input_text = "This movie was fantastic! The acting was superb."

In [52]:
# Get prediction
sentiment, probability = predict_sentiment(input_text)



In [53]:
# Display results
print(f"Input Text: {input_text}")
print(f"Predicted Sentiment: {sentiment}")
print(f"Probability: {probability:.4f}")

Input Text: This movie was fantastic! The acting was superb.
Predicted Sentiment: Positive
Probability: 0.8629


In [54]:
# Example input text
input_text = "complete disappointment."

In [55]:
# Get prediction
sentiment, probability = predict_sentiment(input_text)
# Display results
print(f"Input Text: {input_text}")
print(f"Predicted Sentiment: {sentiment}")
print(f"Probability: {probability:.4f}")

Input Text: complete disappointment.
Predicted Sentiment: Negative
Probability: 0.1050
