In [82]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Libraries for Sentiment Analysis
import re
import nltk
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from wordcloud import WordCloud

# to avoid warnings
import warnings
warnings.filterwarnings('ignore')


In [83]:
nltk.download('omw-1.4')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

IMPORTING DATASET

In [84]:
df = pd.read_csv('redmi6.csv',encoding="latin-1")
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0 out of 5 stars,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,
1,vry small size mobile,Raza ji,3.0 out of 5 stars,on 15 September 2018,Others,All ok but vry small size mobile,7 people found this helpful
2,Full display not working in all application.,Vaibhav Patel,3.0 out of 5 stars,on 18 September 2018,Others,Quite good,7 people found this helpful
3,Value for Money,Amazon Customer,5.0 out of 5 stars,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2 people found this helpful
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0 out of 5 stars,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6 people found this helpful


In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280 entries, 0 to 279
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Review Title   280 non-null    object
 1   Customer name  280 non-null    object
 2   Rating         280 non-null    object
 3   Date           280 non-null    object
 4   Category       280 non-null    object
 5   Comments       280 non-null    object
 6   Useful         110 non-null    object
dtypes: object(7)
memory usage: 15.4+ KB


In [86]:
df.shape

(280, 7)

Data Preprocessing


In [87]:
def extract_number(text):
  try:
    return int(re.findall(r'\d+', text)[0])
  except:
    return 0

# Apply the function to the 'useful' column
df['Useful'] = df['Useful'].apply(extract_number)

# Display the updated DataFrame
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0 out of 5 stars,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0
1,vry small size mobile,Raza ji,3.0 out of 5 stars,on 15 September 2018,Others,All ok but vry small size mobile,7
2,Full display not working in all application.,Vaibhav Patel,3.0 out of 5 stars,on 18 September 2018,Others,Quite good,7
3,Value for Money,Amazon Customer,5.0 out of 5 stars,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0 out of 5 stars,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6


In [88]:
def clean_rating(rating):
  try:
    return float(rating.split()[0])
  except:
    return np.nan
df['Rating'] = df['Rating'].apply(clean_rating)
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0
1,vry small size mobile,Raza ji,3.0,on 15 September 2018,Others,All ok but vry small size mobile,7
2,Full display not working in all application.,Vaibhav Patel,3.0,on 18 September 2018,Others,Quite good,7
3,Value for Money,Amazon Customer,5.0,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6


In [89]:
df["Category"].value_counts()

Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
Others,180
Display,36
Battery,29
Camera,28
Delivery,7


# **DATA ANALYSIS**


Group the review by 'category' and count the number of reviews for each candidate

In [90]:
review_count = df.groupby('Category')['Comments'].count().reset_index()


In [91]:
#plotting the information
fig = px.bar(review_count, x='Category', y='Comments', title='Number of Reviews by Category')
fig.show()

GROUPING BY LIKES

In [92]:
rating_comparison = df.groupby('Category')['Rating'].sum().reset_index()
fig=px.bar(rating_comparison,x='Category',y='Rating',title='Rating Comparison by Category')
fig.show()

Sentiment Analysis for prediction

In [93]:
def clean(text):
    # Remove URLs
    # Convert text to lowercase
    text = text.lower()

    # Replace anything other than alphabets a-z with a space
    text = re.sub('[^a-z]', ' ', text)

    # Split the text into single words
    text = text.split()

    # Initialize WordNetLemmatizer
    lm = WordNetLemmatizer()

    # Lemmatize words and remove stopwords
    text = [lm.lemmatize(word) for word in text if word not in set(
        stopwords.words('english'))]

    # Join the words back into a sentence
    text = ' '.join(word for word in text)

    return text

In [94]:
def getpolarity(text):
    return TextBlob(text).sentiment.polarity

def getsubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def getAnalysis(score):
    if score < 0:
        return 'negative'
    elif score == 0:
        return 'neutral'
    else:
        return 'positive'

In [95]:
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0
1,vry small size mobile,Raza ji,3.0,on 15 September 2018,Others,All ok but vry small size mobile,7
2,Full display not working in all application.,Vaibhav Patel,3.0,on 18 September 2018,Others,Quite good,7
3,Value for Money,Amazon Customer,5.0,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6


In [96]:
df['Comments_clean'] = df['Comments'].apply(clean)
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful,Comments_clean
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0,another midrange killer smartphone xiaomi majo...
1,vry small size mobile,Raza ji,3.0,on 15 September 2018,Others,All ok but vry small size mobile,7,ok vry small size mobile
2,Full display not working in all application.,Vaibhav Patel,3.0,on 18 September 2018,Others,Quite good,7,quite good
3,Value for Money,Amazon Customer,5.0,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2,redmi always king budget segment yet another g...
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6,worst product mi hardcore fan mi one really di...


In [97]:
df['subjectivity'] = df['Comments_clean'].apply(getsubjectivity)
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful,Comments_clean,subjectivity
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0,another midrange killer smartphone xiaomi majo...,0.458063
1,vry small size mobile,Raza ji,3.0,on 15 September 2018,Others,All ok but vry small size mobile,7,ok vry small size mobile,0.45
2,Full display not working in all application.,Vaibhav Patel,3.0,on 18 September 2018,Others,Quite good,7,quite good,0.6
3,Value for Money,Amazon Customer,5.0,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2,redmi always king budget segment yet another g...,0.585119
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6,worst product mi hardcore fan mi one really di...,0.85


In [98]:
df['polarity'] = df['Comments_clean'].apply(getpolarity)
df['sentiment'] = df['polarity'].apply(getAnalysis)
df.head()

Unnamed: 0,Review Title,Customer name,Rating,Date,Category,Comments,Useful,Comments_clean,subjectivity,polarity,sentiment
0,Another Midrange killer Smartphone by Xiaomi,Rishikumar Thakur,4.0,on 1 October 2018,Display,Another Midrange killer Smartphone by Xiaomi\n...,0,another midrange killer smartphone xiaomi majo...,0.458063,0.169989,positive
1,vry small size mobile,Raza ji,3.0,on 15 September 2018,Others,All ok but vry small size mobile,7,ok vry small size mobile,0.45,0.125,positive
2,Full display not working in all application.,Vaibhav Patel,3.0,on 18 September 2018,Others,Quite good,7,quite good,0.6,0.7,positive
3,Value for Money,Amazon Customer,5.0,on 28 September 2018,Display,Redmi has always have been the the king of bud...,2,redmi always king budget segment yet another g...,0.585119,0.186508,positive
4,Not worth for the money,Sudhakaran Wadakkancheri,2.0,on 18 September 2018,Others,worst product from MI. I am a hardcore fan of ...,6,worst product mi hardcore fan mi one really di...,0.85,-0.8,negative


In [99]:
# Count positive and negative sentiments for each category
sentiment_count = df.groupby(['Category', 'sentiment'])['Comments'].count().reset_index()

# Plotting the sentiment count
fig = px.bar(sentiment_count, x='Category', y='Comments', color='sentiment',
             barmode='group', title='Sentiment Count by Category')
fig.show()
