In [24]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

#Data Loading
data = pd.read_csv('iphone.csv')
print(data.head())
print("\n\n\n", data.isnull().sum())
print("\n\n\n", data['ratingScore'].value_counts())

#Text Cleaning
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):

    if isinstance(text, str):
        tokens = word_tokenize(text)
        tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalnum() and word not in stop_words]
        return ' '.join(tokens)
    else:
        return ' '
    
data['cleaned_review'] = data['reviewDescription'].apply(preprocess_text)
print("\n\n Cleaned data: ",data['cleaned_review'])


  productAsin country        date  isVerified  ratingScore  \
0  B09G9BL5CP   India  11-08-2024        True            4   
1  B09G9BL5CP   India  16-08-2024        True            5   
2  B09G9BL5CP   India  14-05-2024        True            4   
3  B09G9BL5CP   India  24-06-2024        True            5   
4  B09G9BL5CP   India  18-05-2024        True            5   

               reviewTitle                                  reviewDescription  \
0               No charger  Every thing is good about iPhones, there's not...   
1          iPhone 13 256GB  It look so fabulous, I am android user switche...   
2  Flip camera option nill  I tried to flip camera while recording but no ...   
3                  Product                                       100% genuine   
4             Good product         Happy to get the iPhone 13 in Amazon offer   

                                           reviewUrl  \
0  https://www.amazon.in/gp/customer-reviews/R345...   
1  https://www.amazon.in/gp/

In [43]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

#Sentiment Classification

def assign_sentiment(rating):
    if rating >= 4:
        return 2 #Positive
    elif rating ==3:
        return 1 #Neutral
    else:
        return 0 #Negative

data['label'] = data['ratingScore'].apply(assign_sentiment)
print("Data Sentiment Labels:\n", data['label'].head())

#Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(data['cleaned_review'])
sequences = tokenizer.texts_to_sequences(data['cleaned_review'])
X = pad_sequences(sequences, maxlen=100)
y = data['label']

#Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128),
    LSTM(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])
model.fit(X, y, epochs=5, batch_size=32, validation_split=0.2)

print("Compiled and ready to predict!!!")


Data Sentiment Labels:
 0    2
1    2
2    2
3    2
4    2
Name: label, dtype: int64
Epoch 1/5
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.6611 - loss: 0.8403 - val_accuracy: 0.5710 - val_loss: 0.9473
Epoch 2/5
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7659 - loss: 0.6160 - val_accuracy: 0.7178 - val_loss: 0.7577
Epoch 3/5
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8684 - loss: 0.3731 - val_accuracy: 0.7618 - val_loss: 0.6885
Epoch 4/5
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.9058 - loss: 0.2491 - val_accuracy: 0.7830 - val_loss: 0.6932
Epoch 5/5
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.9370 - loss: 0.1833 - val_accuracy: 0.7847 - val_loss: 0.7606
Compiled and ready to predict!!!


In [45]:
predictions = model.predict(X)
data['predicted_sentiment'] = predictions.argmax(axis=1)
data.to_csv('sentiment_results.csv', index=False)

[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
