# Analyze Product Sentiment

In [1]:
import turicreate

# Read product review data

In [2]:
products = turicreate.SFrame('amazon_baby.sframe')

# Explore data

In [3]:
products

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


In [4]:
products.groupby('name',operations={'count':turicreate.aggregate.COUNT()}).sort('count',ascending=False)

name,count
Vulli Sophie the Giraffe Teether ...,785
"Simple Wishes Hands-Free Breastpump Bra, Pink, ...",562
Infant Optics DXR-5 2.4 GHz Digital Video Baby ...,561
Baby Einstein Take Along Tunes ...,547
Cloud b Twilight Constellation Night ...,520
"Fisher-Price Booster Seat, Blue/Green/Gray ...",489
Fisher-Price Rainforest Jumperoo ...,450
"Graco Nautilus 3-in-1 Car Seat, Matrix ...",419
Leachco Snoogle Total Body Pillow ...,388
"Regalo Easy Step Walk Thru Gate, White ...",374


# Building a sentiment classifier

## Build word count vectors

In [5]:
products['word_count'] = turicreate.text_analytics.count_words(products['review'])

In [6]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']



### create a column products[‘awesome’] 

In [7]:
def awesome_count(word_counts):
    if 'awesome' in word_counts:
        return int(word_counts['awesome'])
    return 0   

products['awesome'] = products['word_count'].apply(awesome_count)

### Create a new field with selected workd

In [8]:
def selected_word_count(word_counts):
    selected_word_count = dict()
    for word in selected_words:
        if word in word_counts:
            selected_word_count[word] = word_counts[word]
    return selected_word_count;

In [9]:
products['selected_word_count'] = products['word_count'].apply(selected_word_count)
products = products.remove_column('awesome')
products.head(5)


name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'handles': 1.0, 'stripping': 1.0, ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'moist': 1.0, 'osocozy': ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'comfortable': 1.0, ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'little': 1.0, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ..."

selected_word_count
{}
{'love': 1.0}
{}
{'love': 2.0}
"{'great': 1.0, 'love': 1.0} ..."


## How to count each selected word in an efficiency way

In [10]:
from collections import Counter
from functools import reduce

d1 = {"k1": 1, "k2": 2}
d2 = {"k1": 4, "k3": 3}

def sum_dict(d1, d2):
    return dict(Counter(d1) + Counter(d2))

sum_dict(d1, d2)

{'k1': 5, 'k2': 2, 'k3': 3}

In [11]:
numbers = [1,2,3,0]
reduce(lambda x, y: x*y, numbers)

0

In [12]:
l1 = [d1, d2]
reduce(lambda x, y: sum_dict(x, y), l1 )

{'k1': 5, 'k2': 2, 'k3': 3}

In [13]:
reduce(lambda x, y: sum_dict(x, y), products['selected_word_count'])

{'love': 43867.0,
 'great': 59536.0,
 'fantastic': 1765.0,
 'amazing': 2726.0,
 'bad': 4950.0,
 'awesome': 4075.0,
 'terrible': 1282.0,
 'horrible': 1245.0,
 'wow': 461.0,
 'hate': 1285.0,
 'awful': 753.0}

### Or count thte most popular word in selected words by a slow way

In [14]:
# products['awesome'].sum()

# Define what is positive and negative sentiment

In [15]:
products['rating'].show()

In [16]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [17]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [18]:
products

name,review,rating,word_count
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'moist': 1.0, 'osocozy': ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'comfortable': 1.0, ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'little': 1.0, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'tracker': 1.0, 'recommend': 1.0, ..."

selected_word_count,sentiment
{'love': 1.0},1
{},1
{'love': 2.0},1
"{'great': 1.0, 'love': 1.0} ...",1
{'great': 1.0},1
{},1
{},1
{'fantastic': 1.0},1
{},1
{'love': 2.0},1


In [19]:
products['sentiment'].show()

# Train our sentiment classifier

In [20]:
train_data,test_data = products.random_split(.8,seed=0)

### selected_word_model

In [21]:
selected_word_model = turicreate.logistic_classifier.create(train_data,target='sentiment', features=['selected_word_count'], validation_set=test_data)




### sentiment model

In [22]:
sentiment_model = turicreate.logistic_classifier.create(train_data,target='sentiment', features=['word_count'], validation_set=test_data)


### quiz 2

In [23]:
coefficients = selected_word_model.coefficients.sort('value', ascending=False)
coefficients.print_rows(num_rows = 13)

+---------------------+-----------+-------+-----------------------+
|         name        |   index   | class |         value         |
+---------------------+-----------+-------+-----------------------+
| selected_word_count |    love   |   1   |   1.3592688669225153  |
|     (intercept)     |    None   |   1   |   1.3365913848877569  |
| selected_word_count |  awesome  |   1   |   1.1335346660341437  |
| selected_word_count |  amazing  |   1   |   1.1000933113660283  |
| selected_word_count | fantastic |   1   |   0.8858047568814288  |
| selected_word_count |   great   |   1   |   0.8630655001196608  |
| selected_word_count |    wow    |   1   | -0.009538236067679493 |
| selected_word_count |    bad    |   1   |  -0.9914778800650572  |
| selected_word_count |    hate   |   1   |  -1.3484407222463128  |
| selected_word_count |   awful   |   1   |  -2.0529082040313513  |
| selected_word_count |  terrible |   1   |   -2.223661436085128  |
| selected_word_count |  horrible |   1   |  -2.

### quiz 3

In [24]:
selected_word_model.evaluate(test_data)

{'accuracy': 0.8463848186404036,
 'auc': 0.6935096220934976,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        0        |  371  |
 |      0       |        1        |  4957 |
 |      1       |        1        | 27817 |
 |      1       |        0        |  159  |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.9157860082304526,
 'log_loss': 0.3962265467087378,
 'precision': 0.8487520595594068,
 'recall': 0.9943165570488991,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+-----+-------+------+
 | threshold |        fpr         | tpr |   p   |  n   |
 +-----------+--------------------+-----+-------+------+
 |    0.0    |        1.0         | 1.0 | 27976 | 5328 

In [25]:
sentiment_model.evaluate(test_data)

{'accuracy': 0.9176975738650012,
 'auc': 0.9258242975424673,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        1        |  1397 |
 |      0       |        0        |  3931 |
 |      1       |        1        | 26632 |
 |      1       |        0        |  1344 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.951057941255245,
 'log_loss': 0.33047871872412343,
 'precision': 0.9501587641371436,
 'recall': 0.9519588218472976,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+--------------------+-------+------+
 | threshold |        fpr         |        tpr         |   p   |  n   |
 +-----------+--------------------+--------------------+-------+------+
 |    0.0 

# Predicts

## Predict with selected_word

In [26]:
products['predicted_sentiment'] = selected_word_model.predict(products, output_type = 'probability')

#products

## Set baseline for majority class classifier

follow this link: https://github.com/ugaliguy/Machine-Learning-Univ-Washington/blob/master/Course-3-Classification/module-2-linear-classifier-assignment-blank.py

And check out this thread on Coursera: https://www.coursera.org/learn/ml-foundations/discussions/all/threads/sIwtjKudEeWjcBKYJq1ZMQ

In [28]:
num_positive = (test_data['sentiment'] == +1).sum()
num_negative = (test_data['sentiment'] == 0).sum()
print(num_positive)
print(num_negative)
print(num_positive/len(test_data))
print(num_negative/len(test_data))

27976
5328
0.8400192169108815
0.15998078308911842


# Apply the sentiment classifier to better understand the Baby Trend Diaper Champ’ reviews

In [28]:
diaper_champ_reviews = test_data[test_data['name']== 'Baby Trend Diaper Champ']
diaper_champ_reviews.head(4)

name,review,rating,word_count,selected_word_count,sentiment
Baby Trend Diaper Champ,I have had a diaper genie for almost 4 years since ...,1.0,"{'clean': 1.0, 'it': 8.0, 'thing': 1.0, 'years': ...",{},0
Baby Trend Diaper Champ,I am so glad I got the Diaper Champ instead of ...,5.0,"{'best': 1.0, 'that': 1.0, 'handle': 1.0, ...",{},1
Baby Trend Diaper Champ,We loved this pail at first. The mechanism ...,1.0,"{'retire': 1.0, 'd': 1.0, 'have': 1.0, 'pronto': ...",{'great': 1.0},0
Baby Trend Diaper Champ,I bought this for my church nursery room. At ...,5.0,"{'recommend': 1.0, 'highly': 1.0, 'well': ...",{'bad': 1.0},1


In [29]:
sentiment_model.evaluate(diaper_champ_reviews)

{'accuracy': 0.8157894736842105,
 'auc': 0.76875,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        1        |   7   |
 |      0       |        0        |   9   |
 |      1       |        1        |   53  |
 |      1       |        0        |   7   |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.8833333333333333,
 'log_loss': 0.9526943486578525,
 'precision': 0.8833333333333333,
 'recall': 0.8833333333333333,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------+--------------------+----+----+
 | threshold |  fpr   |        tpr         | p  | n  |
 +-----------+--------+--------------------+----+----+
 |    0.0    |  1.0   |        1.0         | 60 | 16 |
 |   0.001   |  0

### predict 'diaper champ' by sentiment model

### quiz 4

In [30]:
diaper_champ_reviews['pred_sentiment_by_ww'] = sentiment_model.predict(diaper_champ_reviews, output_type = 'probability')
dcr_ww = diaper_champ_reviews.sort('pred_sentiment_by_ww', ascending=False)
dcr_ww.head(4)

name,review,rating,word_count
Baby Trend Diaper Champ,I read a review below that can explain exactly ...,4.0,"{'key': 1.0, 'have': 1.0, 'pieces': 1.0, 'betwe ..."
Baby Trend Diaper Champ,I have never written a review for Amazon but I ...,5.0,"{'priceless': 1.0, 'parent': 1.0, 'makes': ..."
Baby Trend Diaper Champ,"Let me just say, I LOVE THIS PRODUCT!! I used ...",5.0,"{'fantastic': 1.0, 'over': 1.0, ..."
Baby Trend Diaper Champ,I love my Diaper Champ. My son is 3 months old ...,4.0,"{'clean': 1.0, 'recommed': 1.0, ..."

selected_word_count,sentiment,pred_sentiment_by_ww
{},1,0.999999999989594
{'love': 1.0},1,0.9999999999868132
"{'great': 1.0, 'fantastic': 1.0, 'lo ...",1,0.9999999944395654
"{'great': 1.0, 'love': 1.0} ...",1,0.9999999915488168


### Use the selected_word_model to predict the most positive review in senditment_model

### quiz 5

In [33]:
sentiment_model.predict(dcr_ww[0], output_type='probability')

dtype: float
Rows: 1
[0.9999999999895941]

In [34]:
diaper_champ_reviews['pred_sentiment_by_sw'] = selected_word_model.predict(diaper_champ_reviews, output_type = 'probability')

selected_word_model.predict(dcr_ww[0:1], output_type='probability')

dtype: float
Rows: 1
[0.7919288370624455]

## Show the most positive reviews

In [None]:
diaper_champ_reviews[0]['review']

In [None]:
diaper_champ_reviews[0]['word_count']


In [None]:
diaper_champ_reviews[0]['selected_word_count']

# Most negative reivews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']