In [33]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk

In [34]:
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Microsoft\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Microsoft\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Microsoft\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Microsoft\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [35]:
df = pd.read_csv(r'Avaliacao_Produtos.csv')
df.head()

Unnamed: 0,product_id,product_title,rating,review,upvotes,downvotes
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,1-more flexible2-bass is very high3-sound clarity is good 4-battery back up to 6 to 8 hour's 5-main thing is fastest charging system is available in that. Only 20 min charge and get long up to 4 hours back up 6-killing look awesome 7-for gaming that product does not support 100% if you want for gaming then I'll recommend you please don't buy but you want for only music then this product is very well for you.. 8-no more wireless headphones are comparing with that headphones at this pric...,1390,276
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,Super sound and good looking I like that prize,643,133
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,"Very much satisfied with the device at this price point being from an awesome brand. Design wise , I liked it more than rockerz 255 model.Bought the model blindly on its very first day with zero reviews and it was a wonderful choice I must say.Bluetooth 5.0 enables us to connect devices simultaneously. And vibration motor for calls , its too good.",1449,328
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,"Nice headphone, bass was very good and sound is clear. I am purchase for calling purpose and best choice even riding on bike wear in one ear and opposite end voice was clear and no complaint for calling.. Battery backup 1 full day",160,28
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,Sound quality super battery backup super quality super this price range super and this value for money,533,114


In [36]:
sid = SentimentIntensityAnalyzer()

In [37]:
def get_sentiment(review):
    scores = sid.polarity_scores(review)
    compound = scores['compound']
    if compound > 0.05:
        return 'good'
    elif compound < -0.05:
        return 'bad'
    else:
        return 'mixed'

In [38]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [39]:
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
    return tokens

In [40]:
def get_top_words(review):
    tokens = preprocess_text(review)
    
    # Calculate sentiment for each word
    word_sentiments = []
    for word in set(tokens):
        scores = sid.polarity_scores(word)
        if scores['compound'] != 0:  # Ignore neutral words
            word_sentiments.append((word, scores['compound']))
    
    # Sort by sentiment score
    word_sentiments.sort(key=lambda x: x[1], reverse=True)
    
    # Get top 3 positive and negative words
    pos_words = [w for w, s in word_sentiments if s > 0][:3]
    neg_words = [w for w, s in word_sentiments if s < 0][:3]
    
    # Pad with None if fewer than 3 words
    pos_words += [None] * (3 - len(pos_words))
    neg_words += [None] * (3 - len(neg_words))
    
    return pos_words + neg_words

In [41]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Microsoft\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [42]:
df['sentiment'] = df['review'].apply(get_sentiment)

In [43]:
word_columns = df['review'].apply(get_top_words)
word_columns = pd.DataFrame(word_columns.tolist(), columns=['pos_word1', 'pos_word2', 'pos_word3', 'neg_word1', 'neg_word2', 'neg_word3'], index=df.index)


In [44]:
df = pd.concat([df, word_columns], axis=1)

In [45]:
df.head(30)

Unnamed: 0,product_id,product_title,rating,review,upvotes,downvotes,sentiment,pos_word1,pos_word2,pos_word3,neg_word1,neg_word2,neg_word3
0,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,1-more flexible2-bass is very high3-sound clarity is good 4-battery back up to 6 to 8 hour's 5-main thing is fastest charging system is available in that. Only 20 min charge and get long up to 4 hours back up 6-killing look awesome 7-for gaming that product does not support 100% if you want for gaming then I'll recommend you please don't buy but you want for only music then this product is very well for you.. 8-no more wireless headphones are comparing with that headphones at this pric...,1390,276,good,awesome,good,support,,,
1,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,Super sound and good looking I like that prize,643,133,good,super,prize,good,,,
2,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,"Very much satisfied with the device at this price point being from an awesome brand. Design wise , I liked it more than rockerz 255 model.Bought the model blindly on its very first day with zero reviews and it was a wonderful choice I must say.Bluetooth 5.0 enables us to connect devices simultaneously. And vibration motor for calls , its too good.",1449,328,good,awesome,wonderful,wise,,,
3,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,"Nice headphone, bass was very good and sound is clear. I am purchase for calling purpose and best choice even riding on bike wear in one ear and opposite end voice was clear and no complaint for calling.. Battery backup 1 full day",160,28,good,best,good,nice,complaint,,
4,ACCFZGAQJGYCYDCM,BoAt Rockerz 235v2 with ASAP charging Version 5.0 Bluetooth Headset,5,Sound quality super battery backup super quality super this price range super and this value for money,533,114,good,super,value,,,,


In [48]:
df.to_csv('Avaliacao_Produtos_sentimento_EN.csv', index=False)

In [49]:
df['neg_word2'].unique()

array([None, 'aggressively', 'pain', 'disturbance', 'complaint', 'delay',
       'isolation', 'prblm', 'harsh', 'bad', 'unsatisfied', 'doubt',
       'problem', 'disappointed', 'disappoint', 'lag', 'gun', 'lost',
       'problematic', 'uncomfortable', 'annoying', 'waste', 'loose',
       'poor', 'frustrating', 'irritating', 'damage', 'wrong', 'hated',
       'hate', 'mistake', 'worst', 'dissatisfied', 'loss', 'damaged',
       'disadvantage', 'worry', 'killed', 'lower', 'horrible',
       'misbehaving', 'cheated', 'useless', 'sad', 'irritate',
       'disappointment', 'distorted', 'hell', 'losing', 'fake',
       'cancelling', 'hurt', 'regret', 'seriously', 'complained',
       'unhappy', 'complain', 'harm', 'fire', 'negative', 'disturbing',
       'failed', 'lack', 'dead', 'uneasiness', 'stop', 'defective',
       'cheater', 'stuck', 'die', 'insane', 'unclear', 'irritated', 'low',
       'stopped', 'drained', 'discomfort', 'lowest', 'complaining',
       'irritation', 'destroy', 'damn

In [50]:
df.describe()

Unnamed: 0,rating,upvotes,downvotes
count,9374.0,9374.0,9374.0
mean,4.094623,8.102731,1.911457
std,1.345756,102.29623,15.389562
min,1.0,0.0,0.0
25%,4.0,0.0,0.0
50%,5.0,0.0,0.0
75%,5.0,1.0,0.0
max,5.0,7611.0,435.0
