# Analyze Product Sentiment

In [None]:
import turicreate
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# Read product review data

In [38]:
products = pd.read_csv('data/amazon_baby.csv')

In [39]:
products.head()

Unnamed: 0,name,review,rating
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5


In [40]:
products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183531 entries, 0 to 183530
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   name    183213 non-null  object
 1   review  182702 non-null  object
 2   rating  183531 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 4.2+ MB


# Count number of occurrences of key words

In [41]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [42]:
from collections import Counter
import re

def count_words(row):
    review = str(row[1])
    words = re.findall(r'\b\w+\b', review.lower())
    count = Counter(words)
    return {word: count.get(word, 0) for word in selected_words}


df_counts = pd.DataFrame(list(map(count_words, products.itertuples())))

In [43]:
df_counts

Unnamed: 0,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
183526,0,0,0,0,0,0,0,0,0,0,0
183527,0,0,0,0,0,0,0,0,0,0,0
183528,0,0,0,0,0,0,0,0,0,0,0
183529,0,0,0,0,0,0,0,0,0,0,0


In [44]:
new_columns = df_counts.columns.values

products[new_columns] = df_counts

In [45]:
products.head()

Unnamed: 0,name,review,rating,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3,0,0,0,0,0,0,0,0,0,0,0
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,0,0,0,0,0,0,0,0,0,0,0
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,0,0,0,0,0,0,0,0,0,0,0
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,0,0,0,0,1,0,0,0,0,0,0
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,0,0,0,0,1,0,0,0,0,0,0


In [46]:
products.sum().sort_values()

  products.sum().sort_values()


horrible          0
terrible          0
awful             0
hate              0
fantastic         4
awesome           5
bad               9
wow              46
amazing          83
great           332
love           1480
rating       756230
dtype: int64

In [47]:
products = products.query('rating != 3')

In [48]:
products['sentiment'] = products['rating'] >= 4

In [53]:
products.head()

Unnamed: 0,name,review,rating,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,0,0,0,0,0,0,0,0,0,0,0,True
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,0,0,0,0,0,0,0,0,0,0,0,True
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,0,0,0,0,1,0,0,0,0,0,0,True
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,0,0,0,0,1,0,0,0,0,0,0,True
5,Stop Pacifier Sucking without tears with Thumb...,"When the Binky Fairy came to our house, we did...",5,0,0,0,0,1,0,0,0,0,0,0,True


In [57]:
from sklearn.model_selection import train_test_split

# X = products.drop(columns=['rating', 'sentiment'])
# y = products[['sentiment']]

# (X_train, y_train), (X_testm, y_test) = train_test_split(products, test_size=0.2, random_state=0)

train, test = train_test_split(products, test_size=0.2, random_state=0)

X_train, y_train = train.drop(columns=['rating', 'sentiment']), train['sentiment']
X_test, y_test = test.drop(columns=['rating', 'sentiment']), test['sentiment']

In [59]:
selected_words

['awesome',
 'great',
 'fantastic',
 'amazing',
 'love',
 'horrible',
 'bad',
 'terrible',
 'awful',
 'wow',
 'hate']

In [60]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
features = selected_words
model.fit(X_train[features], y_train)

In [80]:
from sklearn.metrics import accuracy_score

y_test_hat = model.predict(X_test[features])

accuracy_score(y_test, y_test_hat)

0.8392252106383616

In [78]:
model.coef_

array([[-0.97788577,  0.41103388,  0.40325099,  0.12695896,  0.30058303,
         0.        ,  0.64110553,  0.        ,  0.        , -0.45767329,
         0.        ]])

In [79]:
for element, coef in zip(features, list(model.coef_[0])):
    print(element, coef)

awesome -0.9778857655780836
great 0.411033881744055
fantastic 0.4032509909004488
amazing 0.12695896103213067
love 0.30058302744836685
horrible 0.0
bad 0.6411055326470896
terrible 0.0
awful 0.0
wow -0.45767328737898605
hate 0.0


# Load data and create new features

In [89]:
products = turicreate.SFrame('data/amazon_baby.sframe')
products.head()

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


# Create sentiment analysis classifier

## Build word count vectors

In [90]:
products['word_count'] = turicreate.text_analytics.count_words(products['review'])

In [91]:
products

name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'handles': 1.0, 'stripping': 1.0, ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'moist': 1.0, 'osocozy': ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'comfortable': 1.0, ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'little': 1.0, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ..."


# Define what is positive and negative sentiment

In [92]:
products['rating'].show()

In [93]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [94]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [95]:
products

name,review,rating,word_count,sentiment
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'moist': 1.0, 'osocozy': ...",1
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'comfortable': 1.0, ...",1
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'little': 1.0, ...",1
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",1
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",1
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'tracker': 1.0, 'recommend': 1.0, ...",1


In [None]:
products['sentiment'].show()

# Train our sentiment classifier

In [96]:
train_data,test_data = products.random_split(.8,seed=0)

In [97]:
sentiment_model = turicreate.logistic_classifier.create(train_data,target='sentiment', features=['word_count'], validation_set=test_data)

In [98]:
y_hat_test = sentiment_model.predict(test_data)

In [99]:
sentiment_model.evaluate(test_data)

{'accuracy': 0.9176975738650012,
 'auc': 0.9258242975424673,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        1        |  1397 |
 |      0       |        0        |  3931 |
 |      1       |        1        | 26632 |
 |      1       |        0        |  1344 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.951057941255245,
 'log_loss': 0.33047871872409,
 'precision': 0.9501587641371436,
 'recall': 0.9519588218472976,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+--------------------+-------+------+
 | threshold |        fpr         |        tpr         |   p   |  n   |
 +-----------+--------------------+--------------------+-------+------+
 |    0.0    

In [103]:
sentiment_model.coefficients.sort('value')

name,index,class,value,stderr
word_count,transpired,1,-30.432202747925214,
word_count,themobi,1,-29.79241829803961,
word_count,cobbbler,1,-28.87115735702563,
word_count,hahaaaa,1,-28.3709845874117,
word_count,wheelbarrow,1,-28.291157373139814,
word_count,athlectic,1,-28.0682080953346,
word_count,than1,1,-26.79928504455487,
word_count,resewing,1,-26.69473876621892,
word_count,anyow,1,-26.627965022736618,
word_count,swadddlers,1,-26.59180003182813,


In [104]:
sentiment_model.evaluate(test_data)

{'accuracy': 0.9176975738650012,
 'auc': 0.9258242975424673,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        1        |  1397 |
 |      0       |        0        |  3931 |
 |      1       |        1        | 26632 |
 |      1       |        0        |  1344 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.951057941255245,
 'log_loss': 0.33047871872409,
 'precision': 0.9501587641371436,
 'recall': 0.9519588218472976,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+--------------------+-------+------+
 | threshold |        fpr         |        tpr         |   p   |  n   |
 +-----------+--------------------+--------------------+-------+------+
 |    0.0    

In [110]:
test_data[test_data['name'] == 'Baby Trend Diaper Champ']

name,review,rating,word_count,sentiment
Baby Trend Diaper Champ,I have had a diaper genie for almost 4 years since ...,1.0,"{'clean': 1.0, 'it': 8.0, 'thing': 1.0, 'years': ...",0
Baby Trend Diaper Champ,I am so glad I got the Diaper Champ instead of ...,5.0,"{'best': 1.0, 'that': 1.0, 'handle': 1.0, ...",1
Baby Trend Diaper Champ,We loved this pail at first. The mechanism ...,1.0,"{'retire': 1.0, 'd': 1.0, 'have': 1.0, 'pronto': ...",0
Baby Trend Diaper Champ,I bought this for my church nursery room. At ...,5.0,"{'recommend': 1.0, 'highly': 1.0, 'well': ...",1
Baby Trend Diaper Champ,Bad construction is my main issue. My husband ...,1.0,"{'quickly': 1.0, 'gets': 1.0, 'everyone': 2.0, ...",0
Baby Trend Diaper Champ,This is on my list of must haves. The thing ...,5.0,"{'world': 1.0, 'trade': 1.0, 'hours': 1.0, ...",1
Baby Trend Diaper Champ,It suprises me to read all of these reviews ...,5.0,"{'champ': 1.0, 'get': 1.0, 'are': 1.0, 'here': ...",1
Baby Trend Diaper Champ,You really can not appreciate this device ...,5.0,"{'reviews': 1.0, 'negative': 1.0, ...",1
Baby Trend Diaper Champ,I am glad my friend told me about the diaper ...,5.0,"{'be': 1.0, 'parents': 1.0, 'recommend': 1.0, ...",1
Baby Trend Diaper Champ,I Loved this product. It kept smell to a minimum ...,5.0,"{'recommend': 1.0, 'size': 1.0, 'regular': ...",1


In [111]:
diper_champ = products[products['name'] == 'Baby Trend Diaper Champ']

In [None]:
model.predict()

In [106]:
test_data['sentiment'].mean()

0.8400192169108813

# Apply the sentiment classifier to better understand the Giraffe reviews

In [None]:
products['predicted_sentiment'] = sentiment_model.predict(products, output_type = 'probability')

In [None]:
products

In [None]:
giraffe_reviews = products[products['name']== 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

# Sort the Giraffe reviews according to predicted sentiment

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews

In [None]:
giraffe_reviews.tail()

## Show the most positive reviews

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

# Most negative reivews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']