In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense  # Change SimpleRNN to LSTM
from tensorflow.keras.optimizers import Adam  # Import Adam optimizer

In [2]:
# Load the dataset
data = pd.read_csv('/kaggle/input/amazon-alexa-dataset/amazon_alexa.tsv',sep = '\t') 

# Preprocess the data
texts = data['verified_reviews'].astype(str)
labels = data['feedback']

In [3]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [4]:
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [5]:
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

In [6]:
max_sequence_length = 100
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

In [7]:
embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_sequence_length))
model.add(LSTM(units=64, activation='relu'))  # Change to LSTM
model.add(Dense(units=1, activation='sigmoid'))

In [8]:
# Use Adam optimizer with a lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [9]:
# Increase the number of epochs
epochs = 5
batch_size = 32
model.fit(X_train_padded, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7cc4e4975210>

In [10]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Test Loss: 0.5229305028915405, Test Accuracy: 0.9079365134239197


In [11]:
# Make predictions on new data
new_data = ["i love this product"]
new_data_sequences = tokenizer.texts_to_sequences(new_data)
new_data_padded = pad_sequences(new_data_sequences, maxlen=max_sequence_length)

In [12]:
predictions = model.predict(new_data_padded)
print("Predictions:", predictions)

Predictions: [[0.620986]]
