In [1]:
import pandas as pd

In [2]:
reviews = pd.read_csv('../data/processed data/review_data.csv')

In [3]:
reviews.head()

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating
0,B00009W3PA,{'Size:': ' 6-Foot'},A1AHW6I678O6F2,kevin.,good item,Five Stars,5
1,B0015UGPWQ,{'Size Name:': ' 4 Inch'},A1AHW6I678O6F2,kevin.,very nice product,Five Stars,5
2,B001769IVI,,A1AHW6I678O6F2,kevin.,a must for washers,Five Stars,5
3,B00009W3PA,{'Size:': ' 6-Foot'},A8R48NKTGCJDQ,CDBrannom,Fit my new LG dryer perfectly.,Five Stars,5
4,B0015UGPWQ,{'Size Name:': ' 4 Inch'},A8R48NKTGCJDQ,CDBrannom,Fits perfectly,Five Stars,5


In [4]:
reviews['reviewLen'] = reviews['reviewText'].str.split().map(lambda x: len(x))

In [5]:
reviews.shape

(166, 8)

<b> Since we have small amount of data, we will use all the review text data, larger length and smaller length review.</b>

<b> We will analyse the review texts using Sentiment analysis. We will be using 3 different NLP sentiment analysis packages:
    
    * VADER : It uses a list of lexical features (e.g. word) which are labeled as positive or negative according to their   semantic orientation to calculate the text sentiment. Vader sentiment returns the probability of a given input sentence to be: Positive, negative, and neutral. 
    
    * TextBlob : Textblob sentiment analyzer returns two properties for a given input sentence: 
    Polarity is a float that lies between [-1,1], -1 indicates negative sentiment and +1 indicates positive sentiments.
    Subjectivity is also a float which lies in the range of [0,1]. Subjective sentences generally refer to personal opinion, emotion, or judgment.
    
    * Flair
</b>

# Vader sentiment analysis

In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [7]:
def sentiment_analysis_scores(sentence):
    score = analyser.polarity_scores(sentence)
    return score

In [8]:
def sentiments_vader(reviews):

    reviews['text_sentiment'] = reviews['reviewText'].apply(sentiment_analysis_scores)

    reviews['compound'] = [x.get('compound') for x in reviews['text_sentiment']]
    reviews['neg'] = [x.get('neg') for x in reviews['text_sentiment']]
    reviews['neu'] = [x.get('neu') for x in reviews['text_sentiment']]
    reviews['pos'] = [x.get('pos') for x in reviews['text_sentiment']]
    
    return reviews

In [9]:
reviews_sent = sentiments_vader(reviews)

reviews_sent[reviews_sent['rating'] == 5].tail(10)

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating,reviewLen,text_sentiment,compound,neg,neu,pos
156,B00DM8J11Q,,AFUVGAUNQVT0S,Brian M.,Common failure with the whilrlpool agitator af...,Common failure with the whilrlpool agitator af...,5,23,"{'neg': 0.213, 'neu': 0.787, 'pos': 0.0, 'comp...",-0.6908,0.213,0.787,0.0
157,B00DM8KQ2Y,,AFUVGAUNQVT0S,Brian M.,Common failure part on Whirlpool built top loa...,This is an easy replacement and the customer w...,5,24,"{'neg': 0.182, 'neu': 0.662, 'pos': 0.156, 'co...",-0.2023,0.182,0.662,0.156
158,B00P9C66PM,,AFUVGAUNQVT0S,Brian M.,the spin clutch is a wear part on WP built top...,... care of a slow spin issue and at a great p...,5,28,"{'neg': 0.0, 'neu': 0.67, 'pos': 0.33, 'compou...",0.8964,0.0,0.67,0.33
159,B004XLDHSE,,A2O7BWHBIV1HWZ,A/C dude,Worked great!!,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.176, 'pos': 0.824, 'comp...",0.6892,0.0,0.176,0.824
160,B00570RQ0A,,A2O7BWHBIV1HWZ,A/C dude,worked great!!,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.176, 'pos': 0.824, 'comp...",0.6892,0.0,0.176,0.824
161,B008YDSH6E,,A2O7BWHBIV1HWZ,A/C dude,Worked great!! I just wonder how long they las...,Worked great!! I just wonder how long they last,5,20,"{'neg': 0.0, 'neu': 0.68, 'pos': 0.32, 'compou...",0.8577,0.0,0.68,0.32
162,B00DM8J11Q,,A2O7BWHBIV1HWZ,A/C dude,Great!!,Five Stars,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6892,0.0,0.0,1.0
163,B00DM8JA7Q,,A2O7BWHBIV1HWZ,A/C dude,worked great,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'comp...",0.6249,0.0,0.196,0.804
164,B00JGTUQJ6,,ALZOND1IEW8KS,Bill,Filter works just like the more expensive filters,works great,5,8,"{'neg': 0.0, 'neu': 0.737, 'pos': 0.263, 'comp...",0.3612,0.0,0.737,0.263
165,B00JGTUQJ6,,A2OP8DEIIUESP,Chris,First time I used this brand. But will buy it ...,water tastes great,5,23,"{'neg': 0.0, 'neu': 0.871, 'pos': 0.129, 'comp...",0.5023,0.0,0.871,0.129


# TextBlob sentiment analysis

In [10]:
# Creating lambda functions to find the polarity and subjectivity of each routine

from textblob import TextBlob

pol = lambda x: TextBlob(x).sentiment.polarity
sub = lambda x: TextBlob(x).sentiment.subjectivity

reviews_sent['polarity'] = reviews_sent['reviewText'].apply(pol)
reviews_sent['subjectivity'] = reviews_sent['reviewText'].apply(sub)
reviews_sent[reviews_sent['rating'] == 5].tail(10)

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating,reviewLen,text_sentiment,compound,neg,neu,pos,polarity,subjectivity
156,B00DM8J11Q,,AFUVGAUNQVT0S,Brian M.,Common failure with the whilrlpool agitator af...,Common failure with the whilrlpool agitator af...,5,23,"{'neg': 0.213, 'neu': 0.787, 'pos': 0.0, 'comp...",-0.6908,0.213,0.787,0.0,-0.272222,0.3
157,B00DM8KQ2Y,,AFUVGAUNQVT0S,Brian M.,Common failure part on Whirlpool built top loa...,This is an easy replacement and the customer w...,5,24,"{'neg': 0.182, 'neu': 0.662, 'pos': 0.156, 'co...",-0.2023,0.182,0.662,0.156,0.079167,0.533333
158,B00P9C66PM,,AFUVGAUNQVT0S,Brian M.,the spin clutch is a wear part on WP built top...,... care of a slow spin issue and at a great p...,5,28,"{'neg': 0.0, 'neu': 0.67, 'pos': 0.33, 'compou...",0.8964,0.0,0.67,0.33,0.333333,0.55
159,B004XLDHSE,,A2O7BWHBIV1HWZ,A/C dude,Worked great!!,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.176, 'pos': 0.824, 'comp...",0.6892,0.0,0.176,0.824,1.0,0.75
160,B00570RQ0A,,A2O7BWHBIV1HWZ,A/C dude,worked great!!,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.176, 'pos': 0.824, 'comp...",0.6892,0.0,0.176,0.824,1.0,0.75
161,B008YDSH6E,,A2O7BWHBIV1HWZ,A/C dude,Worked great!! I just wonder how long they las...,Worked great!! I just wonder how long they last,5,20,"{'neg': 0.0, 'neu': 0.68, 'pos': 0.32, 'compou...",0.8577,0.0,0.68,0.32,0.457552,0.554167
162,B00DM8J11Q,,A2O7BWHBIV1HWZ,A/C dude,Great!!,Five Stars,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6892,0.0,0.0,1.0,1.0,0.75
163,B00DM8JA7Q,,A2O7BWHBIV1HWZ,A/C dude,worked great,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'comp...",0.6249,0.0,0.196,0.804,0.8,0.75
164,B00JGTUQJ6,,ALZOND1IEW8KS,Bill,Filter works just like the more expensive filters,works great,5,8,"{'neg': 0.0, 'neu': 0.737, 'pos': 0.263, 'comp...",0.3612,0.0,0.737,0.263,0.0,0.6
165,B00JGTUQJ6,,A2OP8DEIIUESP,Chris,First time I used this brand. But will buy it ...,water tastes great,5,23,"{'neg': 0.0, 'neu': 0.871, 'pos': 0.129, 'comp...",0.5023,0.0,0.871,0.129,0.0,0.291667


# Flair sentiment analysis

In [11]:
from flair.models import TextClassifier
from flair.data import Sentence
import re

def isfloat(value):
  try:
    float(value)
    return True
  except ValueError:
    return False

classifier = TextClassifier.load('en-sentiment')
def flair_prediction(x):
    sentence = Sentence(x)
    classifier.predict(sentence)
    score = str(sentence.labels[0])
    score = score.replace('(',' ').replace(')',' ')
    number = [float(s) for s in score.split() if isfloat(s) is True]
    
    if "POSITIVE" in score:
        return number[0]
    elif "NEGATIVE" in score:
        return -number[0]

2022-05-04 19:04:29,190 loading file C:\Users\varun\.flair\models\sentiment-en-mix-distillbert_4.pt


In [12]:
reviews_sent["flair_sentiment"] = reviews_sent["reviewText"].apply(flair_prediction)
reviews_sent.sample(20)

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating,reviewLen,text_sentiment,compound,neg,neu,pos,polarity,subjectivity,flair_sentiment
13,B00CW0O1EW,,A2CIEGHZ7L1WWR,albert j. kong,Price is good and it makes sense to have a few...,Five Stars,5,18,"{'neg': 0.0, 'neu': 0.605, 'pos': 0.395, 'comp...",0.8689,0.0,0.605,0.395,0.5,0.566667,0.9994
81,B000XXWOGW,,A1EUO0BU72JR7T,Bilbo,Consider the wisdom of spending a lot of money...,"Works well as a replacement, if that's what yo...",5,83,"{'neg': 0.079, 'neu': 0.823, 'pos': 0.098, 'co...",0.1779,0.079,0.823,0.098,0.2125,0.45625,-10.0
69,B000VL060M,,A34A1UP40713F8,James. Backus,I bought a new range hood and found I needed a...,I bought a new range hood and found I needed ...,5,49,"{'neg': 0.0, 'neu': 0.842, 'pos': 0.158, 'comp...",0.8255,0.0,0.842,0.158,0.434545,0.561818,9.0
15,B00P9C66PM,,A2CIEGHZ7L1WWR,albert j. kong,Price and delivery was excellent.,Five Stars,5,5,"{'neg': 0.0, 'neu': 0.519, 'pos': 0.481, 'comp...",0.5719,0.0,0.519,0.481,1.0,1.0,0.9976
132,B0056I99WG,,A6FPI5CM4W5WB,Tim Veteto,Good job,Good job,5,2,"{'neg': 0.0, 'neu': 0.256, 'pos': 0.744, 'comp...",0.4404,0.0,0.256,0.744,0.7,0.6,0.9976
68,B000NCTOUM,,A34A1UP40713F8,James. Backus,A spare thermofuse. Two years ago my dryer sp...,A spare thermofuse. Two years ago my dryer spu...,4,40,"{'neg': 0.067, 'neu': 0.933, 'pos': 0.0, 'comp...",-0.4215,0.067,0.933,0.0,0.0,0.0,0.9999
78,B00DM8KQ2Y,,A26M3TN8QICJ3K,Marty Baker,"Awesome, exactly what I needed",Five Stars,5,5,"{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'comp...",0.6249,0.0,0.494,0.506,0.625,0.625,0.9981
137,B00MGMWTQS,,A3H61AMBJ177DG,Randy Bailey,as advertised,Five Stars,5,2,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,1.0,0.0,0.0,0.0,0.9923
154,B008YDSH6E,,AFUVGAUNQVT0S,Brian M.,These start devices are great for all domestic...,Got compressor start problem?,5,22,"{'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...",0.8622,0.0,0.664,0.336,0.434583,0.65,0.9995
164,B00JGTUQJ6,,ALZOND1IEW8KS,Bill,Filter works just like the more expensive filters,works great,5,8,"{'neg': 0.0, 'neu': 0.737, 'pos': 0.263, 'comp...",0.3612,0.0,0.737,0.263,0.0,0.6,0.9991


<b> It would be better to scale the flair sentiment scores, because there is a wide range.</b>

In [13]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
reviews_sent['flair_scaled'] = scaler.fit_transform(reviews_sent[["flair_sentiment"]])
reviews_sent.sample(20)

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating,reviewLen,text_sentiment,compound,neg,neu,pos,polarity,subjectivity,flair_sentiment,flair_scaled
58,B004XLDDNI,,A11SCLK8GDDN3C,mrclobhead,"All was fine, product was as advertised.",Five Stars,5,7,"{'neg': 0.0, 'neu': 0.769, 'pos': 0.231, 'comp...",0.2023,0.0,0.769,0.231,0.416667,0.5,0.9949,0.038921
1,B0015UGPWQ,{'Size Name:': ' 4 Inch'},A1AHW6I678O6F2,kevin.,very nice product,Five Stars,5,3,"{'neg': 0.0, 'neu': 0.393, 'pos': 0.607, 'comp...",0.4754,0.0,0.393,0.607,0.78,1.0,0.9863,0.038917
41,B000NCTOUM,,A3GP2HMB2AFOKF,Juan,Great product,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'comp...",0.6249,0.0,0.196,0.804,0.8,0.75,0.9886,0.038918
0,B00009W3PA,{'Size:': ' 6-Foot'},A1AHW6I678O6F2,kevin.,good item,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.256, 'pos': 0.744, 'comp...",0.4404,0.0,0.256,0.744,0.7,0.6,0.882,0.038867
9,B0056I99WG,,AR3OHHHW01A8E,Calvin E Reames,Perfect fit for Maytag dryer,Part was a perfect fit,5,5,"{'neg': 0.0, 'neu': 0.326, 'pos': 0.674, 'comp...",0.7351,0.0,0.326,0.674,0.7,0.7,0.9999,0.038924
31,B0006GVNOA,,A1WD61B0C3KQZB,Jason W.,Great product but they need to include more ro...,"Works Great, Just Use Common Sense When Doing ...",4,488,"{'neg': 0.064, 'neu': 0.823, 'pos': 0.113, 'co...",0.9807,0.064,0.823,0.113,0.023,0.470745,-60.0,0.009611
108,B00CW0O1EW,,A13391AZAFJ67K,Fred A,Exelent,Five Stars,5,1,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,1.0,0.0,0.0,0.0,0.9997,0.038923
146,B004XLDDNI,,A2V0JXLJ9VCNNX,Amazon Customer,GREAT PART.,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.196, 'pos': 0.804, 'comp...",0.6249,0.0,0.196,0.804,0.8,0.75,0.9957,0.038922
153,B004XLDE5A,,AFUVGAUNQVT0S,Brian M.,"Common failure part in these dryers, but with ...",but with the great price on these replacements...,5,29,"{'neg': 0.061, 'neu': 0.769, 'pos': 0.169, 'co...",0.6996,0.061,0.769,0.169,0.15631,0.48381,0.9993,0.038923
160,B00570RQ0A,,A2O7BWHBIV1HWZ,A/C dude,worked great!!,Five Stars,5,2,"{'neg': 0.0, 'neu': 0.176, 'pos': 0.824, 'comp...",0.6892,0.0,0.176,0.824,1.0,0.75,0.9981,0.038923


In [14]:
review_transform = reviews_sent[['rating','flair_sentiment']]

In [15]:
from sklearn.preprocessing import PowerTransformer, QuantileTransformer
pt = PowerTransformer()

review_transform['flair_power'] = pt.fit_transform(X = review_transform[['flair_sentiment']])

review_transform

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  review_transform['flair_power'] = pt.fit_transform(X = review_transform[['flair_sentiment']])


Unnamed: 0,rating,flair_sentiment,flair_power
0,5,0.8820,0.055531
1,5,0.9863,0.057186
2,5,0.9998,0.057398
3,5,0.9998,0.057398
4,5,0.9975,0.057362
...,...,...,...
161,5,0.9998,0.057398
162,5,0.9973,0.057359
163,5,0.9938,0.057304
164,5,0.9991,0.057387


In [16]:
qt = QuantileTransformer()

review_transform['flair_quantile'] = qt.fit_transform(X = review_transform[['flair_sentiment']])

review_transform

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  review_transform['flair_quantile'] = qt.fit_transform(X = review_transform[['flair_sentiment']])


Unnamed: 0,rating,flair_sentiment,flair_power,flair_quantile
0,5,0.8820,0.055531,0.206061
1,5,0.9863,0.057186,0.296970
2,5,0.9998,0.057398,0.836364
3,5,0.9998,0.057398,0.836364
4,5,0.9975,0.057362,0.557576
...,...,...,...,...
161,5,0.9998,0.057398,0.836364
162,5,0.9973,0.057359,0.551515
163,5,0.9938,0.057304,0.454545
164,5,0.9991,0.057387,0.693939


In [17]:
qt = QuantileTransformer(output_distribution = 'normal')

review_transform['flair_quantile_normal'] = qt.fit_transform(X = review_transform[['flair_sentiment']])

review_transform

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  review_transform['flair_quantile_normal'] = qt.fit_transform(X = review_transform[['flair_sentiment']])


Unnamed: 0,rating,flair_sentiment,flair_power,flair_quantile,flair_quantile_normal
0,5,0.8820,0.055531,0.206061,-0.820166
1,5,0.9863,0.057186,0.296970,-0.533136
2,5,0.9998,0.057398,0.836364,0.979622
3,5,0.9998,0.057398,0.836364,0.979622
4,5,0.9975,0.057362,0.557576,0.144826
...,...,...,...,...,...
161,5,0.9998,0.057398,0.836364,0.979622
162,5,0.9973,0.057359,0.551515,0.129490
163,5,0.9938,0.057304,0.454545,-0.114185
164,5,0.9991,0.057387,0.693939,0.507048


In [18]:
reviews_sent.tail(40)

Unnamed: 0,productID,style,reviewerID,reviewerName,reviewText,summary,rating,reviewLen,text_sentiment,compound,neg,neu,pos,polarity,subjectivity,flair_sentiment,flair_scaled
126,B00P9C66PM,,A23KSPVPHZF6EO,Angela Samos Guardia,"This works great, wonderful product it worked ...",Five Stars,5,8,"{'neg': 0.0, 'neu': 0.318, 'pos': 0.682, 'comp...",0.8934,0.0,0.318,0.682,0.833333,0.783333,0.9897,0.038919
127,B001DPFP88,,A6FPI5CM4W5WB,Tim Veteto,Good Job,Good Job,5,2,"{'neg': 0.0, 'neu': 0.256, 'pos': 0.744, 'comp...",0.4404,0.0,0.256,0.744,0.7,0.6,0.9976,0.038922
128,B00470A1S0,,A6FPI5CM4W5WB,Tim Veteto,"Great product, on time.",Great,5,4,"{'neg': 0.0, 'neu': 0.423, 'pos': 0.577, 'comp...",0.6249,0.0,0.423,0.577,0.8,0.75,0.9855,0.038917
129,B004XLDDNI,,A6FPI5CM4W5WB,Tim Veteto,Great,Great,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6249,0.0,0.0,1.0,0.8,0.75,0.9901,0.038919
130,B004XLDE5A,,A6FPI5CM4W5WB,Tim Veteto,Great,Great,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6249,0.0,0.0,1.0,0.8,0.75,0.9901,0.038919
131,B0053F80JA,,A6FPI5CM4W5WB,Tim Veteto,Great,Great,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6249,0.0,0.0,1.0,0.8,0.75,0.9901,0.038919
132,B0056I99WG,,A6FPI5CM4W5WB,Tim Veteto,Good job,Good job,5,2,"{'neg': 0.0, 'neu': 0.256, 'pos': 0.744, 'comp...",0.4404,0.0,0.256,0.744,0.7,0.6,0.9976,0.038922
133,B00MGMWTQS,,A6FPI5CM4W5WB,Tim Veteto,Great,Great,5,1,"{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound...",0.6249,0.0,0.0,1.0,0.8,0.75,0.9901,0.038919
134,B001DPFP88,,A3H61AMBJ177DG,Randy Bailey,as advertised,Five Stars,5,2,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,1.0,0.0,0.0,0.0,0.9923,0.03892
135,B00470A1S0,,A3H61AMBJ177DG,Randy Bailey,as advertised,Five Stars,5,2,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,0.0,1.0,0.0,0.0,0.0,0.9923,0.03892


In [19]:
reviews_sent['rating'].describe()

count    166.000000
mean       4.620482
std        0.956922
min        1.000000
25%        5.000000
50%        5.000000
75%        5.000000
max        5.000000
Name: rating, dtype: float64

In [20]:
# compound if > 0 its a positive sentiment else negative
reviews_sent['compound'].describe()

count    166.000000
mean       0.514346
std        0.354421
min       -0.948100
25%        0.296000
50%        0.624900
75%        0.761400
max        0.993500
Name: compound, dtype: float64

In [21]:
# polarity if > 0 its a positive sentiment else negative
reviews_sent['polarity'].describe()

count    166.000000
mean       0.441175
std        0.323945
min       -0.272222
25%        0.154989
50%        0.500000
75%        0.700000
max        1.000000
Name: polarity, dtype: float64

In [22]:
review_transform[['flair_sentiment','flair_power','flair_quantile']].describe()

Unnamed: 0,flair_sentiment,flair_power,flair_quantile
count,166.0,166.0,166.0
mean,37.435637,-8.401914e-17,0.500146
std,267.324035,1.003026,0.291215
min,-80.0,-5.08547,0.0
25%,0.9314,0.05631851,0.242424
50%,0.9949,0.05732098,0.49697
75%,0.9994,0.05739162,0.748485
max,2001.0,4.784332,1.0


In [23]:
# range between 0 to 1 for scaled
reviews_sent['flair_scaled'].describe()

count    166.000000
mean       0.056432
std        0.128459
min        0.000000
25%        0.038891
50%        0.038921
75%        0.038923
max        1.000000
Name: flair_scaled, dtype: float64

<b> The Vader and TextBlob sentiment analysis looks pretty decent compared to flair sentiment analysis. The range of sentiment score for flair is large. The former 2 packages looks more promising.</b>

In [24]:
import scattertext as st

In [25]:
reviews_sent['Liked'] = reviews_sent.rating.apply(lambda x: x > 3).map({True: 'Liked', False: 'Not Liked'})

reviews_sent['Liked']

0      Liked
1      Liked
2      Liked
3      Liked
4      Liked
       ...  
161    Liked
162    Liked
163    Liked
164    Liked
165    Liked
Name: Liked, Length: 166, dtype: object

In [26]:
corpus = st.CorpusFromPandas(
    reviews_sent,
    category_col = 'Liked',
    text_col = 'reviewText',
    nlp=st.whitespace_nlp_with_sentences
).build()

In [27]:
html = st.produce_scattertext_explorer(
        corpus,
        category="Liked",
        category_name='Liked',
        not_category_name='Not Liked',
        minimum_term_frequency=10,
        pmi_threshold_coefficient=5,
        width_in_pixels=1000,
        metadata=reviews_sent['productID']
        )

In [28]:
open('reviews_scatter.html', 'wb').write(html.encode('utf-8'));

In [29]:
reviews_sent['Liked'] = reviews_sent.polarity.apply(lambda x: x > 0).map({True: 'Liked', False: 'Not Liked'})

reviews_sent['Liked']

0          Liked
1          Liked
2      Not Liked
3          Liked
4          Liked
         ...    
161        Liked
162        Liked
163        Liked
164    Not Liked
165    Not Liked
Name: Liked, Length: 166, dtype: object

In [30]:
corpus = st.CorpusFromPandas(
    reviews_sent,
    category_col = 'Liked',
    text_col = 'reviewText',
    nlp=st.whitespace_nlp_with_sentences
).build()

In [31]:
html = st.produce_scattertext_explorer(
        corpus,
        category="Liked",
        category_name='Liked',
        not_category_name='Not Liked',
        minimum_term_frequency=10,
        pmi_threshold_coefficient=5,
        width_in_pixels=1000,
        metadata=reviews_sent['productID']
        )

In [32]:
open('reviews_scatter_textblob.html', 'wb').write(html.encode('utf-8'));

In [33]:
reviews_sent['Liked'] = reviews_sent.compound.apply(lambda x: x > 0).map({True: 'Liked', False: 'Not Liked'})

reviews_sent['Liked']

0          Liked
1          Liked
2      Not Liked
3          Liked
4          Liked
         ...    
161        Liked
162        Liked
163        Liked
164        Liked
165        Liked
Name: Liked, Length: 166, dtype: object

In [34]:
corpus = st.CorpusFromPandas(
    reviews_sent,
    category_col = 'Liked',
    text_col = 'reviewText',
    nlp=st.whitespace_nlp_with_sentences
).build()

In [35]:
html = st.produce_scattertext_explorer(
        corpus,
        category="Liked",
        category_name='Liked',
        not_category_name='Not Liked',
        minimum_term_frequency=10,
        pmi_threshold_coefficient=5,
        width_in_pixels=1000,
        metadata=reviews_sent['productID']
        )

In [36]:
open('reviews_scatter_vader.html', 'wb').write(html.encode('utf-8'));

In [39]:
reviews_final = reviews_sent[['productID','reviewerID','reviewerName','rating','Liked']]

In [45]:
liked_5 = reviews_final.query('rating == 5')

In [47]:
liked_5.Liked.value_counts(normalize=True)

Liked        0.870504
Not Liked    0.129496
Name: Liked, dtype: float64