In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Load the CSV file
df = pd.read_csv('flipkart.csv')

# Print the columns to verify the column names
print(df.columns)

# Process the ratings into labels
def label_rating(rating):
    if rating < 3:
        return 'Negative'
    elif rating == 3 or rating == 4:
        return 'Neutral'
    else:
        return 'Positive'

# Check the first few rows of the DataFrame to verify the data
print(df.head())

# Assuming the columns are named 'product', 'review', and 'rating'
df['label'] = df['rating'].apply(label_rating)

# Extract texts and labels
texts = df['review'].tolist()
labels = df['label'].tolist()

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
data = pad_sequences(sequences, padding='post')

# Label Encoding
label_mapping = {'Positive': 0, 'Neutral': 1, 'Negative': 2}
encoded_labels = np.array([label_mapping[label] for label in labels])
labels = tf.keras.utils.to_categorical(encoded_labels, num_classes=3)

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Model Definition
model = Sequential([
    Embedding(input_dim=len(word_index) + 1, output_dim=16, input_length=data.shape[1]),
    Bidirectional(LSTM(32, return_sequences=True)),
    Dropout(0.3),
    Bidirectional(LSTM(32)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')
])

# Adjusting the learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Model Compilation
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()

# Early Stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# Model Training with increased epochs
model.fit(x_train, y_train, epochs=50, validation_data=(x_test, y_test), callbacks=[early_stopping])

# Model Evaluation
loss, accuracy = model.evaluate(x_test, y_test)

print(f"Loss is: {loss}")
print(f"Test Accuracy: {accuracy}")

# Prediction Function
def predict_user_input(text):
    user_sequence = tokenizer.texts_to_sequences([text])
    user_data = pad_sequences(user_sequence, maxlen=data.shape[1], padding='post')
    prediction = model.predict(user_data)
    predicted_label = np.argmax(prediction, axis=1)[0]
    for key, value in label_mapping.items():
        if value == predicted_label:
            return key

user_text = input("Enter the review please: ")
predict_label = predict_user_input(user_text)
print(f"The predicted label is {predict_label}")


Index(['Unnamed: 0', 'Product_name', 'Review', 'Rating'], dtype='object')
   Unnamed: 0                                       Product_name  \
0           0  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
1           1  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
2           2  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
3           3  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
4           4  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   

                                              Review  Rating  
0  Best under 60k Great performanceI got it for a...       5  
1                                 Good perfomence...       5  
2  Great performance but usually it has also that...       5  
3           My wife is so happy and best product 👌🏻😘       5  
4  Light weight laptop with new amazing features,...       5  


KeyError: 'rating'