In [75]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import os


In [76]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("gitadityamaddali/flipkart-laptop-reviews")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'flipkart-laptop-reviews' dataset.
Path to dataset files: /kaggle/input/flipkart-laptop-reviews


In [77]:
# ----- Step 1: Load data -----
# Example path: change this to your actual Kaggle dataset path
data_path = os.path.join(path, "laptops_dataset_final_600.csv")

# Read CSV file
data = pd.read_csv(data_path, encoding='utf-8')

In [78]:
# ----- Step 2: Select relevant columns -----
data = data[['product_name', 'overall_rating']].dropna()

In [79]:
# ----- Step 3: Create labels -----
# Good (1) if rating >= 4, else Bad (0)
data['label'] = data['overall_rating'].apply(lambda x: 1 if x >= 4 else 0)

In [80]:
texts = data['product_name'].astype(str).values
labels = data['label'].values

In [81]:
# Tokenize text (convert words to integers)
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [82]:
# Pad sequences to equal length
max_len = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_len, padding='post')


In [83]:
# ----- Step 5: Split data -----
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


In [84]:
# ----- Step 6: Build RNN model -----
vocab_size = len(tokenizer.word_index) + 1

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_len),
    SimpleRNN(64, activation='tanh'),
    Dense(1, activation='sigmoid')  # 1 neuron for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [85]:
# ----- Step 7: Train the model -----
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.9638 - loss: 0.1057 - val_accuracy: 0.9889 - val_loss: 0.0307
Epoch 2/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9974 - loss: 0.0109 - val_accuracy: 0.9961 - val_loss: 0.0158
Epoch 3/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9987 - loss: 0.0057 - val_accuracy: 0.9979 - val_loss: 0.0124
Epoch 4/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9995 - loss: 0.0024 - val_accuracy: 0.9984 - val_loss: 0.0118
Epoch 5/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9997 - loss: 0.0023 - val_accuracy: 0.9984 - val_loss: 0.0091
Epoch 6/10
[1m483/483[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9997 - loss: 0.0014 - val_accuracy: 0.9987 - val_loss: 0.0076
Epoch 7/10
[1m483/483[0m 

<keras.src.callbacks.history.History at 0x7de8858b1640>

In [86]:
model.summary()

In [87]:
# ----- Step 8: Evaluate on test data -----
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9992 - loss: 0.0088
Test Accuracy: 99.92%


In [96]:
# ----- Step 9: Predict a new review -----
test_review = ["Lenovo Chromebook MediaTek Kompanio 520 - (4 GB/128 GB EMMC St..."]
seq = tokenizer.texts_to_sequences(test_review)
pad = pad_sequences(seq, maxlen=max_len, padding='post')
pred = model.predict(pad)[0][0]

if pred > 0.5:
    print("Predicted Sentiment: GOOD Review")
else:
    print("Predicted Sentiment: BAD Review")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Predicted Sentiment: BAD Review
