In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

from google.colab import drive
drive.mount('drive')

# Read sentiment data from CSV file
data = pd.read_csv('/content/drive/My Drive/sentiment_data.csv')

# Separate phrases and sentiments from the data
phrases = data['Phrase'].values
sentiments = data['Sentiment'].values

# Tokenize the phrases
tokenizer = Tokenizer()
tokenizer.fit_on_texts(phrases)
sequences = tokenizer.texts_to_sequences(phrases)

# Pad sequences to a fixed length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert sentiments to categorical labels
sentiment_labels = np.unique(sentiments)
encoded_sentiments = np.array([np.where(sentiment_labels == sentiment)[0][0] for sentiment in sentiments])

# Split the data into training and testing sets
split_ratio = 0.8
split_index = int(len(padded_sequences) * split_ratio)

x_train = padded_sequences[:split_index]
y_train = encoded_sentiments[:split_index]

x_test = padded_sequences[split_index:]
y_test = encoded_sentiments[split_index:]

# Define the RNN model
embedding_dim = 100
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, embedding_dim, input_length=max_sequence_length))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(sentiment_labels), activation='softmax'))

# Compile and train the model
batch_size = 32
epochs = 10
learning_rate = 0.001
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate), metrics=['accuracy'])
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test))

# Make predictions on new data
new_phrases = ['This product is amazing!', 'I regret buying this.', 'It is just average.']
new_sequences = tokenizer.texts_to_sequences(new_phrases)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_sequence_length)
predictions = model.predict(new_padded_sequences)

# Convert predictions to sentiment labels
predicted_sentiments = [sentiment_labels[np.argmax(prediction)] for prediction in predictions]

# Print the predicted sentiments
for phrase, sentiment in zip(new_phrases, predicted_sentiments):
    print(f"Phrase: {phrase}")
    print(f"Predicted Sentiment: {sentiment}")
    print()


Mounted at drive
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Phrase: This product is amazing!
Predicted Sentiment: positive

Phrase: I regret buying this.
Predicted Sentiment: negative

Phrase: It is just average.
Predicted Sentiment: negative

