In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [2]:
import nltk
import re


In [3]:
import string
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

nltk.download('stopwords')
from nltk.corpus import stopwords

stopword = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
data = pd.read_csv('/content/flipkart_reviews.csv')
data.head()

Unnamed: 0,Product_name,Review,Rating
0,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Best under 60k Great performanceI got it for a...,5
1,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Good perfomence...,5
2,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Great performance but usually it has also that...,5
3,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,My wife is so happy and best product 👌🏻😘,5
4,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,"Light weight laptop with new amazing features,...",5


In [5]:
data.columns

Index(['Product_name', 'Review', 'Rating'], dtype='object')

In [6]:
data.isnull().sum()

Product_name    0
Review          0
Rating          0
dtype: int64

In [7]:
data.head(15)

Unnamed: 0,Product_name,Review,Rating
0,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Best under 60k Great performanceI got it for a...,5
1,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Good perfomence...,5
2,Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...,Great performance but usually it has also that...,5
3,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,My wife is so happy and best product 👌🏻😘,5
4,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,"Light weight laptop with new amazing features,...",5
5,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,"Amazing laptop, am so much happy, thanks for F...",5
6,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,Over all a good laptop for personal use,5
7,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,Thank you so much Flipkart,4
8,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,Amazing product,5
9,DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...,"Good for normal work , students, online classe...",3


In [8]:
data.Review

0       Best under 60k Great performanceI got it for a...
1                                      Good perfomence...
2       Great performance but usually it has also that...
3                My wife is so happy and best product 👌🏻😘
4       Light weight laptop with new amazing features,...
                              ...                        
2299    Great display, accurate colours at this price ...
2300    Superb monitor first brought 1 used for 2 mont...
2301                                              Awesome
2302                          Only one issue with adapter
2303    Worth the money u spend for this monitor Great...
Name: Review, Length: 2304, dtype: object

In [9]:
#cleaning the data
stemmer = nltk.SnowballStemmer('english')

def clean(text):
  text = str(text).lower()
  text = re.sub('\[.*?\]','',text)
  text = re.sub('https?://\S+|WWW\.\S+','',text)
  text = re.sub('<.*?>+','', text)
  text = re.sub('\n','',text)
  text = re.sub('[%s]' % re.escape(string.punctuation),'', text)


  text = [word for word in text.split(' ')]
  text = " ".join(text)
  text = [stemmer.stem(word) for word in text.split(' ')]
  text = " ".join(text)

  return text


In [10]:
data['Review'] = data['Review'].apply(clean)

In [12]:
#visualize

ratings = data['Rating'].value_counts()
numbers = ratings.index
quantity = ratings.values

import plotly.express as px

figure = px.pie(data, values = quantity, names = numbers, hole=0.5)

figure.show()

In [13]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [14]:
sentiment = SentimentIntensityAnalyzer()

In [16]:
data['Postive'] = [sentiment.polarity_scores(i)['pos'] for i in data['Review']]
data['Negative'] = [sentiment.polarity_scores(i)['neg'] for i in data['Review']]
data['Netural'] = [sentiment.polarity_scores(i)['neu'] for i in data['Review']]

In [17]:
data = data[["Review","Postive","Negative","Netural"]]


In [18]:
data.head(10)

Unnamed: 0,Review,Postive,Negative,Netural
0,best under 60k great performancei got it for a...,0.308,0.073,0.618
1,good perfom,0.744,0.0,0.256
2,great perform but usual it has also that game ...,0.139,0.0,0.861
3,my wife is so happi and best product 👌🏻😘,0.358,0.0,0.642
4,light weight laptop with new amaz featur batte...,0.0,0.0,1.0
5,amaz laptop am so much happi thank for flipkart,0.257,0.0,0.743
6,over all a good laptop for person use,0.326,0.0,0.674
7,thank you so much flipkart,0.385,0.0,0.615
8,amaz product,0.0,0.0,1.0
9,good for normal work student onlin class watc...,0.188,0.0,0.812


In [20]:
x = sum(data['Postive'])
y = sum(data['Negative'])
z = sum(data['Netural'])

def sentiment_score(a,b,c):
  if(a>b) and (a>c):
    print('Postive')
  elif(b>a) and (b>c):
    print('Negative')
  else:
    print('Neutral')

sentiment_score(x,y,z)

Neutral


In [21]:
print('Positive:',x)
print('Negative:',y)
print('Neutral:',z)

Positive: 716.1999999999997
Negative: 92.84800000000014
Neutral: 1494.9519999999982
