In [3]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
from termcolor import colored

In [4]:
# Load dataclean_test.csv
print(colored("Loading train and test data...", "yellow"))
train_data = pd.read_csv('/content/clean_train.csv')
test_data = pd.read_csv('/content/clean_test.csv')
print(colored("Data loaded successfully.", "green"))

Loading train and test data...
Data loaded successfully.


In [5]:
# Tokenization and Padding
print(colored("Tokenizing and padding data...", "yellow"))
tokenizer = Tokenizer(num_words=5000, split=' ')
tokenizer.fit_on_texts(train_data['Clean_tweet'].astype(str).values)
train_sequences = tokenizer.texts_to_sequences(train_data['Clean_tweet'].astype(str).values)
test_sequences = tokenizer.texts_to_sequences(test_data['Clean_tweet'].astype(str).values)

max_len = max([len(x) for x in train_sequences])
train_tweets = pad_sequences(train_sequences, maxlen=max_len)
test_tweets = pad_sequences(test_sequences, maxlen=max_len)
print(colored("Tokenization and padding complete.", "green"))


Tokenizing and padding data...
Tokenization and padding complete.


In [6]:
# Build the LSTM Model
print(colored("Creating the LSTM model...", "yellow"))
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    SpatialDropout1D(0.4),
    LSTM(256, dropout=0.2, recurrent_dropout=0.2),
    Dense(2, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Creating the LSTM model...




In [7]:
# Train the Model
print(colored("Training the LSTM model...", "green"))
history = model.fit(
    train_tweets, pd.get_dummies(train_data['Sentiment']).values,
    epochs=10, batch_size=128, validation_split=0.2, verbose=1
)
print(colored("Model training complete.", "green"))

Training the LSTM model...
Epoch 1/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m602s[0m 300ms/step - accuracy: 0.7281 - loss: 0.5316 - val_accuracy: 0.7755 - val_loss: 0.4713
Epoch 2/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m616s[0m 297ms/step - accuracy: 0.7782 - loss: 0.4646 - val_accuracy: 0.7800 - val_loss: 0.4637
Epoch 3/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m656s[0m 314ms/step - accuracy: 0.7871 - loss: 0.4489 - val_accuracy: 0.7800 - val_loss: 0.4636
Epoch 4/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m644s[0m 295ms/step - accuracy: 0.7924 - loss: 0.4381 - val_accuracy: 0.7816 - val_loss: 0.4649
Epoch 5/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 294ms/step - accuracy: 0.7966 - loss: 0.4309 - val_accuracy: 0.7811 - val_loss: 0.4643
Epoch 6/10
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m624s[0m 295ms/step - accuracy: 0.8019 - loss: 0.4207 - val_a

In [8]:
# Save the Model and Tokenizer
model.save('lstm_sentiment_model.h5')
tokenizer_json = tokenizer.to_json()
with open('tokenizer.json', 'w') as f:
    f.write(tokenizer_json)
print(colored("Model and tokenizer saved successfully.", "green"))



Model and tokenizer saved successfully.


In [9]:
# Evaluate the Model
print(colored("Evaluating the LSTM model on test data...", "yellow"))
score, accuracy = model.evaluate(test_tweets, pd.get_dummies(test_data['Sentiment']).values, batch_size=128)
print("Test accuracy: {:.2f}%".format(accuracy * 100))
print(colored("Model evaluation complete.", "green"))

Evaluating the LSTM model on test data...
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 97ms/step - accuracy: 0.8356 - loss: 0.3595
Test accuracy: 82.46%
Model evaluation complete.


In [13]:
# Load Model and Tokenizer for New Predictions
print(colored("Loading the saved model and tokenizer...", "yellow"))
loaded_model = load_model('lstm_sentiment_model.h5')
with open('tokenizer.json') as f:
    loaded_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(f.read())
print(colored("Model and tokenizer loaded successfully.", "green"))



Loading the saved model and tokenizer...
Model and tokenizer loaded successfully.


In [16]:
import numpy as np

In [17]:
# Predict on a New Example
def predict_sentiment(text):
    sequence = loaded_tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_len)
    prediction = loaded_model.predict(padded_sequence)
    sentiment = "Positive" if np.argmax(prediction) == 1 else "Negative"
    print(colored(f"Predicted Sentiment: {sentiment}", "blue"))

In [18]:
# Testing the prediction with an example
example_text = "I'm really excited about this product, it's amazing!"
predict_sentiment(example_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predicted Sentiment: Positive
