In [None]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

In [None]:
# Load your dataset
df = pd.read_csv('your_data.csv')


In [None]:
# Handle missing values in the 'text' column
# if needed
# comment this is not needed
df['text'].fillna("", inplace=True)


In [None]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])

In [None]:
# Padding sequences
X = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

# Assuming 'sentiment_numerical' is the target variable
y = df['sentiment_numerical']


In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [None]:
# Change values to perform hyperparameter tuning
EPOCHS = 20
BTH_SIZE= 35
VAL_SPLIT = 0.1

In [None]:
# Create the model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=100))
model.add(Bidirectional(LSTM(25, kernel_regularizer='l2')))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu', kernel_regularizer='l2'))  # L2 regularization
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax', kernel_initializer='he_normal'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Print the model summary
print(model.summary())

In [None]:
# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=EPOCHS,
    batch_size=BTH_SIZE,
    validation_split=VAL_SPLIT
)

In [None]:
# Evaluate on the test set
print("\nTest Accuracy")
print(model.evaluate(X_test, y_test)[1])
acc = history.history['accuracy']
loss = history.history['loss']

In [None]:
# Plot the accuracy and loss curves
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label=' Accuracy')
plt.legend(loc='lower right')
plt.title('Accuracy')
plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label=' Loss')
plt.legend(loc='upper right')
plt.title('Loss')
plt.show()

In [None]:
# Save the trained model
model.save('rnn_model.h5')

The following code cell will extract a random text and perform a prediction

In [None]:
# Load the trained model for random text prediction
model = load_model('improved_rnn_model.h5')

# Assuming 'text' is the column containing the text data
texts = df['text']
sentiments = df['sentiment']  # Assuming 'sentiment' is the column containing the sentiment labels

# Choose a random index
random_index = np.random.randint(0, len(texts))

# Get the text and original sentiment at the random index
random_text = texts.iloc[random_index]
original_sentiment = sentiments.iloc[random_index]

# Preprocess the random text
sequence = tokenizer.texts_to_sequences([random_text])
padded_sequence = pad_sequences(sequence, maxlen=100, padding='post', truncating='post')

# Make a prediction
predictions = model.predict(padded_sequence)

# Get the predicted sentiment
predicted_sentiment = predictions.argmax()

print("Sentiment numeric labels: 0 = negative, 1 = neutral, 2 = positive")
print(f"Random Text: {random_text}")
print(f"Original Sentiment: {original_sentiment}")
print(f"Predicted Sentiment: {predicted_sentiment}")

The following code cell will enable the user to give an input and model will perform a prediction

In [None]:

# Load the trained model for random text prediction
model = load_model('improved_rnn_model.h5')

# Take user input for the text
user_text = input("Enter the text: ")

# Preprocess the user input
sequence = tokenizer.texts_to_sequences([user_text])
padded_sequence = pad_sequences(sequence, maxlen=100, padding='post', truncating='post')

# Make a prediction
predictions = model.predict(padded_sequence)

# Get the predicted sentiment
predicted_sentiment = predictions.argmax()

print("Sentiment numeric labels: 0 = negative, 1 = neutral, 2 = positive")
print(f"User Input Text: {user_text}")
print(f"Predicted Sentiment: {predicted_sentiment}")