# Analyze Product Sentiment

In [10]:
import turicreate

In [11]:
products = turicreate.SFrame('../../data/amazon_baby.sframe')

# Build the word_count column on which we will run the sentiment analysis

In [12]:
products['word_count'] = turicreate.text_analytics.count_words(products['review'])

# Build a function which will return the word count for a given column

In [13]:
def word_count(word_counts, column_name):
    if(column_name not in word_counts):
        return 0
    return word_counts[column_name]

# Define the subset of words we will base our Sentiment Analysis on

In [14]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

# Use .apply() to build a new feature with the counts for each of the selected_words

In [16]:
for word in selected_words:
    products[word] = products['word_count'].apply(lambda x : word_count(x, word))

In [17]:
products

name,review,rating,word_count,awesome,great
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'handles': 1.0, 'stripping': 1.0, ...",0.0,0.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'highly': 1.0, ...",0.0,0.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'of': 1.0, 'the': 1.0, 'than': 1.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'approach': 2.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",0.0,1.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",0.0,1.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",0.0,0.0

fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
0.0,0.0,0.0,0,0.0,0,0,0,0
0.0,0.0,1.0,0,0.0,0,0,0,0
0.0,0.0,0.0,0,0.0,0,0,0,0
0.0,0.0,2.0,0,0.0,0,0,0,0
0.0,0.0,1.0,0,0.0,0,0,0,0
0.0,0.0,0.0,0,0.0,0,0,0,0
0.0,0.0,0.0,0,0.0,0,0,0,0
0.0,0.0,0.0,0,0.0,0,0,0,0
1.0,0.0,0.0,0,0.0,0,0,0,0
0.0,0.0,0.0,0,0.0,0,0,0,0


# Out of the selected_words, which one is most used in the dataset? Which one is least used?

In [18]:
for word in selected_words:
    print(word + ':\t' + str(products[word].sum()))

awesome:	4075.0
great:	59536.0
fantastic:	1765.0
amazing:	2726.0
love:	43867.0
horrible:	1245
bad:	4950.0
terrible:	1282
awful:	753
wow:	461
hate:	1285


In [19]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [20]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [21]:
train_data,test_data = products.random_split(.8,seed=0)

# Create a new sentiment analysis model using only the selected_words as features

In [22]:
selected_words_model = turicreate.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=selected_words,
                                                     validation_set=test_data)

In [23]:
products['predicted_sentiment'] = selected_words_model.predict(products, output_type = 'probability')

In [25]:
selected_words_model.evaluate(test_data)

{'accuracy': 0.8463848186404036,
 'auc': 0.6936022046674926,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |  159  |
 |      0       |        0        |  371  |
 |      0       |        1        |  4957 |
 |      1       |        1        | 27817 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.9157860082304526,
 'log_loss': 0.3962265467087378,
 'precision': 0.8487520595594068,
 'recall': 0.9943165570488991,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+--------------------+-----+-------+------+
 | threshold |        fpr         | tpr |   p   |  n   |
 +-----------+--------------------+-----+-------+------+
 |    0.0    |        1.0         | 1.0 | 27976 | 532

# Sort the learned coefficients according to the ‘value’ column

In [26]:

selected_words_model = selected_words_model['coefficients'].sort('value')

TypeError: 'LogisticClassifier' object is not subscriptable

# Comparing the accuracy of different sentiment analysis model

In [21]:
selected_words_model.evaluate(test_data, metric='roc_curve')

{'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+--------------------+-----+-------+------+
 | threshold |        fpr         | tpr |   p   |  n   |
 +-----------+--------------------+-----+-------+------+
 |    0.0    |        1.0         | 1.0 | 27976 | 5328 |
 |   1e-05   |        1.0         | 1.0 | 27976 | 5328 |
 |   2e-05   |        1.0         | 1.0 | 27976 | 5328 |
 |   3e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   4e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   5e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   6e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   7e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   8e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 |   9e-05   | 0.9998123123123123 | 1.0 | 27976 | 5328 |
 +-----------+--------------------+-----+-------+------+
 [100001 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_row

# We will investigate a product named ‘Baby Trend Diaper Champ’

In [28]:
diaper_champ_reviews = products[products['name']== 'Baby Trend Diaper Champ']

In [29]:
diaper_champ_reviews

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,Ok - newsflash. Diapers are just smelly. We've ...,4.0,"{'convenient': 1.0, 'more': 1.0, 'trash': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,"My husband and I selected the Diaper ""Champ"" ma ...",1.0,"{'system': 1.0, 'try': 1.0, 're': 1.0, 'still': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,Excellent diaper disposal unit. I used it in ...,5.0,"{'nose': 1.0, 'for': 2.0, 'investment': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,We love our diaper champ. It is very easy to use ...,5.0,"{'out': 1.0, 'pull': 1.0, 'open': 1.0, 'pail': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5.0,"{'winter': 1.0, 'outside': 1.0, 'day': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I waited to review this until I saw how it ...,4.0,"{'mom': 1.0, 'my': 1.0, 'empty': 2.0, 'poop': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I have had a diaper genie for almost 4 years since ...,1.0,"{'yuck': 1.0, 'clean': 1.0, 'all': 1.0, 'tra ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,5.0,"{'price': 1.0, 'suggestions': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I am so glad I got the Diaper Champ instead of ...,5.0,"{'best': 1.0, 'that': 1.0, 'handle': 1.0, ' ...",0.0,0.0,0.0
Baby Trend Diaper Champ,We had 2 diaper Genie's both given to us as a ...,4.0,"{'no': 1.0, 'regular': 1.0, 'part': 1.0, ...",0.0,0.0,0.0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0.0,0.0,0,0.0,0,0,0,0,1,0.7919288370624453
0.0,0.0,0,0.0,0,0,0,0,0,0.7919288370624453
0.0,0.0,0,0.0,0,0,0,0,1,0.7919288370624453
0.0,1.0,0,0.0,0,0,0,0,1,0.936781924479937
1.0,0.0,1,0.0,0,0,1,0,1,0.5438399411170777
0.0,0.0,0,1.0,0,0,0,0,1,0.5854321171706491
0.0,0.0,0,0.0,0,0,0,0,0,0.7919288370624453
0.0,0.0,0,0.0,0,0,0,0,1,0.7919288370624453
0.0,0.0,0,0.0,0,0,0,0,1,0.7919288370624453
0.0,2.0,0,0.0,0,0,0,0,1,0.9829620803909052


# Use the sentiment_model to predict the sentiment of each review in diaper_champ_reviews and sort the results according to their ‘predicted_sentiment’

In [30]:
diaper_champ_reviews['predicted_sentiment'] = selected_words_model.predict(diaper_champ_reviews, output_type = 'probability')

# What is the ‘predicted_sentiment’ for the most negative review for ‘Baby Trend Diaper Champ’?

In [31]:
diaper_champ_reviews = diaper_champ_reviews.sort('predicted_sentiment', ascending=False)

In [34]:
diaper_champ_reviews.tail(1)

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,......all I can say is the smell is ...,1.0,"{'buy': 1.0, 'this': 1.0, 't': 1.0, 'don': 1.0, ...",0.0,0.0,0.0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0.0,0.0,1,0.0,0,0,0,0,0,0.2860300801255359


# # What is the ‘predicted_sentiment’ for the most positive review for ‘Baby Trend Diaper Champ’?

In [35]:
diaper_champ_reviews.head(1)

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,I LOVE LOVE LOVE this product! It is SO much ...,4.0,"{'sorry': 1.0, 'be': 1.0, 'will': 1.0, ...",0.0,1.0,0.0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0.0,3.0,0,0.0,0,0,0,0,1,0.9981253623335122


In [36]:
diaper_champ_reviews[0]['review']

'I LOVE LOVE LOVE this product! It is SO much easier to use than the Diaper Genie, (you need a PHD in poopy to figure out how to use the darn thing!) and it even takes the same bags as my kitchen trash can, shich is super convenient, and cost efficient as I can buy them in bulk.The only reason for not rating it a 5 star was that I did have one small problem with it. The foam gasket in the barrell which keeps the poopy smell inside the unit ripped somehow, and it got VERY stinky. HOWEVER, I contacted the manufacturer though their website, and received an email back the same day stating that this was unusual, and that replacement gaskets were on their way to me. They arrived inside of a week and after replacing, it works great again! (They even sent me extras should it happen again)I HIGHLY reccomend this diaper pail over ANY competitors, you will not be sorry!'

In [37]:
selected_words_model.predict(diaper_champ_reviews[0:1], output_type='probability')

dtype: float
Rows: 1
[0.9981253623335122]

In [38]:
selected_words_model.show(view='Evaluation')

AttributeError: 'LogisticClassifier' object has no attribute 'show'