In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

In [None]:
#Loading the dataset
data = pd.read_csv("/content/Womens Clothing E-Commerce Reviews.csv")

In [None]:
#Dropping rows with missing values in the Review Text column
data.dropna(subset=['Review Text'], inplace=True)


In [None]:
#Mapping ratings to sentiments (e.g., ratings 4 and 5 as positive, rating 3 as neutral, ratings 1 and 2 as negative)
data['Sentiment'] = data['Rating'].apply(lambda x: 'positive' if x > 3 else ('neutral' if x == 3 else 'negative'))

In [None]:
#Tokenization of the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['Review Text'])
X = tokenizer.texts_to_sequences(data['Review Text'])
X = pad_sequences(X, maxlen=100)  # Limit reviews to 100 words

In [None]:
#Encoding the sentiment labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Sentiment'])

In [None]:
#Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=100))
model.add(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=3, activation='softmax'))



In [None]:
#Compiling the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
#Training the model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7b9097c23670>

In [None]:
#Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.5625892281532288
Test Accuracy: 0.8052549958229065


In [None]:
#Perform sentiment analysis on new input
def predict_sentiment(review_text):
    #Tokenize and pad the input text
    input_sequence = tokenizer.texts_to_sequences([review_text])
    padded_sequence = pad_sequences(input_sequence, maxlen=100)
    #Predict the sentiment
    sentiment_probs = model.predict(padded_sequence)[0]
    sentiment_label = label_encoder.classes_[sentiment_probs.argmax()]
    return sentiment_label

In [None]:
#sample prediction
user_review = "This dress exceeded my expectations! The fabric is luxurious and feels amazing against the skin."
predicted_sentiment = predict_sentiment(user_review)
print(f"The review by user is: '{user_review}'")
print(f"The review is {predicted_sentiment}.")

The review by user is: 'This dress exceeded my expectations! The fabric is luxurious and feels amazing against the skin.'
The review is positive.
