In [118]:
import turicreate as tc
import matplotlib.pyplot as plt
%matplotlib inline

# Read the amazon baby product review data

In [119]:
products = tc.SFrame(r"amazon_baby")

In [120]:
products.head()

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


## Build word count vectors

In [121]:
products['word_count'] = tc.text_analytics.count_words(products['review'])

## Remove all the unecessary words and build a new column with wordcounts of only given words

In [122]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [123]:
def word_count(dict_x, word):
    if word in dict_x:
        return dict_x[word]
    else:
        return 0.0

In [124]:
for word in selected_words:
    products[word] = products['word_count'].apply(lambda x: word_count(x, word))

In [125]:
products

name,review,rating,word_count,awesome,great
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'handles': 1.0, 'stripping': 1.0, ...",0.0,0.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'highly': 1.0, ...",0.0,0.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'of': 1.0, 'the': 1.0, 'than': 1.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'approach': 2.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",0.0,1.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",0.0,1.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",0.0,0.0

fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Question 1

In [126]:
for word in selected_words:
    print("The total number of times the word {} appeared is {}".format(word, products[word].sum()))

The total number of times the word awesome appeared is 4075.0
The total number of times the word great appeared is 59536.0
The total number of times the word fantastic appeared is 1765.0
The total number of times the word amazing appeared is 2726.0
The total number of times the word love appeared is 43867.0
The total number of times the word horrible appeared is 1245.0
The total number of times the word bad appeared is 4950.0
The total number of times the word terrible appeared is 1282.0
The total number of times the word awful appeared is 753.0
The total number of times the word wow appeared is 461.0
The total number of times the word hate appeared is 1285.0


# Training the model with only selected words.

# Question 2

In [127]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [128]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [129]:
products['sentiment'].show()

In [130]:
train_data,test_data = products.random_split(.8, seed=0)

In [131]:
products.head()

name,review,rating,word_count,awesome,great
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'highly': 1.0, ...",0.0,0.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'of': 1.0, 'the': 1.0, 'than': 1.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'approach': 2.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",0.0,1.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",0.0,1.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'tracker': 1.0, 'now': 1.0, 'its': 1.0, 'sti ...",0.0,0.0

fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment
0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [132]:
selected_words_model = tc.logistic_classifier.create(train_data, target='sentiment', features=selected_words, validation_set=test_data)

In [133]:
selected_words_model.coefficients   # Examine the weights learned classifier assigned to the 11 words in selected_words.

name,index,class,value,stderr
(intercept),,1,1.3365913848877558,0.0089299697876567
awesome,,1,1.133534666034145,0.0839964398318752
great,,1,0.8630655001196618,0.0189550524443773
fantastic,,1,0.8858047568814295,0.1116759129339965
amazing,,1,1.1000933113660285,0.0995477626046598
love,,1,1.3592688669225153,0.0280683001520994
horrible,,1,-2.251335236759093,0.0802024938878844
bad,,1,-0.9914778800650564,0.0384842866469906
terrible,,1,-2.223661436085127,0.0773173620378575
awful,,1,-2.0529082040313518,0.1009973543525925


In [134]:
selected_words_model.coefficients.sort('value')

name,index,class,value,stderr
horrible,,1,-2.251335236759093,0.0802024938878844
terrible,,1,-2.223661436085127,0.0773173620378575
awful,,1,-2.0529082040313518,0.1009973543525925
hate,,1,-1.3484407222463124,0.0771569860429733
bad,,1,-0.9914778800650564,0.0384842866469906
wow,,1,-0.0095382360676788,0.1604641122471166
great,,1,0.8630655001196618,0.0189550524443773
fantastic,,1,0.8858047568814295,0.1116759129339965
amazing,,1,1.1000933113660285,0.0995477626046598
awesome,,1,1.133534666034145,0.0839964398318752


# Question 3

In [135]:
sentiment_model = tc.logistic_classifier.create(train_data,target='sentiment', features=['word_count'], validation_set=test_data)

In [136]:
sentiment_model.evaluate(test_data)

{'accuracy': 0.9176975738650012,
 'auc': 0.9258242975424673,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        1        |  1397 |
 |      1       |        0        |  1344 |
 |      0       |        0        |  3931 |
 |      1       |        1        | 26632 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.951057941255245,
 'log_loss': 0.3304787187241175,
 'precision': 0.9501587641371436,
 'recall': 0.9519588218472976,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+--------------------+-------+------+
 | threshold |        fpr         |        tpr         |   p   |  n   |
 +-----------+--------------------+--------------------+-------+------+
 |    0.0  

In [137]:
selected_words_model.evaluate(test_data)

{'accuracy': 0.8463848186404036,
 'auc': 0.6935096220934976,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |  159  |
 |      0       |        0        |  371  |
 |      0       |        1        |  4957 |
 |      1       |        1        | 27817 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.9157860082304526,
 'log_loss': 0.3962265467087378,
 'precision': 0.8487520595594068,
 'recall': 0.9943165570488991,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+-----+-------+------+
 | threshold |        fpr         | tpr |   p   |  n   |
 +-----------+--------------------+-----+-------+------+
 |    0.0    |        1.0         | 1.0 | 27976 | 5328 

In [138]:
diaper_champ_reviews = products[products['name'] == 'Baby Trend Diaper Champ']

# To predict the sentiment most positive review for the product 'Baby Trend Diaper Champ'

In [139]:
diaper_champ_reviews['predicted_sentiment_all_words'] = sentiment_model.predict(diaper_champ_reviews, output_type = 'probability')

In [140]:
diaper_champ_reviews = diaper_champ_reviews.sort('predicted_sentiment_all_words', ascending=False)

In [141]:
diaper_champ_reviews

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,I read a review below that can explain exactly ...,4.0,"{'key': 1.0, 'have': 1.0, 'pieces': 1.0, 'betwe ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I have never written a review for Amazon but I ...,5.0,"{'priceless': 1.0, 'knows': 1.0, 'parent': ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,5.0,"{'price': 1.0, 'suggestions': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'around': 1.0, 'any': 1.0, 't': 1.0, 'isn': ...",0.0,1.0,0.0
Baby Trend Diaper Champ,Diaper Champ or Diaper Genie? That was my ...,5.0,"{'either': 1.0, 'be': 1.0, 't': 1.0, 'not': ...",0.0,1.0,0.0
Baby Trend Diaper Champ,I am one of those super- critical shoppers who ...,5.0,"{'hope': 1.0, 'make': 1.0, 'slower': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I LOOOVE this diaper pail! Its the easies ...,5.0,"{'buy': 1.0, 'product': 1.0, 'recommend': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,"As a first time mother, I wanted to get the best ...",5.0,"{'ll': 1.0, 'baby': 1.0, 'recommended': 1.0, ' ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I see that there are complaints of stinkiness ...,5.0,"{'very': 1.0, 'told': 1.0, 'all': 1.0, ...",0.0,0.0,0.0
Baby Trend Diaper Champ,I have a 10 year old daughter and an 8 month ...,5.0,"{'sorry': 1.0, 'be': 1.0, 'you': 2.0, 'sell': 1.0, ...",0.0,0.0,0.0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment_all_w ords ...
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.999999999989594
0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999999868132
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999999465672
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999999302822
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999999174132
0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999998430964
0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999997360196
0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999995664316
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.9999999985015902
0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.999999998056851


In [142]:
diaper_champ_reviews[0]['review']

"I read a review below that can explain exactly what we experienced. We've had it for 16 months and it has worked wonderful for us. No smells, change it out once a week, easy to clean. Then a diaper snagged this foam material in the head part, so I pulled the rest of the foam out. Big mistake!!! Now it can no loner retain the stinkiness and we're looking for a replacement. Be careful of overloading and never take out that foam piece that is cushioned between pieces. I have figured out that it is key to keeping the stink out."

In [143]:
selected_words_model.predict(diaper_champ_reviews[0:1], output_type='probability')

dtype: float
Rows: 1
[0.7919288370624453]