In [65]:
import graphlab as gl

products = gl.SFrame('data/amazon_baby.gl/')

In [74]:
selected_words = [
    'awesome', 'great', 'fantastic', 'amazing', 'love',
    'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate'
]

products = products[products['rating'] != 3]
products['sentiment'] = products['rating'] >= 4

def get_word_count_dict(review):
    words = review.split(' ')
    words = filter(lambda word: word != "", words)
    word_count = dict((word, 0) for word in words)
    for word in words:
        word_count.update({word: word_count[word] + 1})

    return word_count

products['word_count'] = products.apply(
    lambda product: get_word_count_dict(product['review'])
)

for word in selected_words:
    products[word] = products.apply(
        lambda product: product['word_count'][word] if word in product['word_count'] else 0
    )
    print('{word}: {_sum}'.format(word=word, _sum=products[word].sum()))

awesome: 1606
great: 34509
fantastic: 754
amazing: 1114
love: 32129
horrible: 567
bad: 3093
terrible: 587
awful: 303
wow: 48
hate: 945


In [75]:
products.show()

Canvas is accessible via web browser at the URL: http://localhost:40561/index.html
Opening Canvas in default web browser.


In [76]:
train_data, test_data = products.random_split(.8, seed=0)
train_data.show()
test_data.show()

Canvas is updated and available in a tab in the default browser.
Canvas is accessible via web browser at the URL: http://localhost:40561/index.html
Opening Canvas in default web browser.


In [77]:
sentiment_model = gl.logistic_classifier.create(
    train_data,
    target='sentiment',
    features=['word_count'],
    validation_set=test_data
)

sentiment_model.evaluate(test_data)
sentiment_model.show(view='Evaluation')

Canvas is accessible via web browser at the URL: http://localhost:40561/index.html
Opening Canvas in default web browser.




In [78]:
selected_words_model = gl.logistic_classifier.create(
    train_data,
    target='sentiment',
    features=selected_words,
    validation_set=test_data
)

selected_words_model.evaluate(test_data)
selected_words_model.show(view='Evaluation')

Canvas is updated and available in a tab in the default browser.


In [79]:
target = 'Baby Trend Diaper Champ'

target_products = products[products['name'] == target]
target_products['predicted_sentiment'] = sentiment_model.predict(target_products, output_type='probability')
target_products = target_products.sort('predicted_sentiment', ascending=False)
target_products.select_columns(['name', 'review', 'predicted_sentiment']).head()

name,review,predicted_sentiment
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,0.999999998183
Baby Trend Diaper Champ,I LOOOVE this diaper pail! Its the easies ...,0.999999989189
Baby Trend Diaper Champ,My baby is now 8 months and the can has been ...,0.999999931519
Baby Trend Diaper Champ,I don't know how we survived the first year ...,0.999999926315
Baby Trend Diaper Champ,Diaper Champ or Diaper Genie? That was my ...,0.999999744483
Baby Trend Diaper Champ,I wanted this diaper pail over the Genie b/c I ...,0.99999973989
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,0.99999969306
Baby Trend Diaper Champ,"I received the Champ at my baby shower, and ...",0.999999592081
Baby Trend Diaper Champ,This diaper pail is great. It is the third ...,0.999999268085
Baby Trend Diaper Champ,"As a first time mother, I wanted to get the best ...",0.999999011191


In [80]:
print("Most positive review: {0}".format(target_products[0]['review']))

Most positive review: Baby Luke can turn a clean diaper to a dirty diaper in 3 seconds flat. The diaper champ turns the smelly diaper into "what diaper smell" in less time than that. I hesitated and wondered what I REALLY needed for the nursery. This is one of the best purchases we made. The champ, the baby bjorn, fluerville diaper bag, and graco pack and play bassinet all vie for the best baby purchase.Great product, easy to use, economical, effective, absolutly fabulous.UpdateI knew that I loved the champ, and useing the diaper genie at a friend's house REALLY reinforced that!! There is no comparison, the chanp is easy and smell free, the genie was difficult to use one handed (which is absolutly vital if you have a little one on a changing pad) and there was a deffinite odor eminating from the genieplus we found that the quick tie garbage bags where the ties are integrated into the bag work really well because there isn't any added bulk around the sealing edge of the champ.


In [81]:
target_products['predicted_sentiment'] = selected_words_model.predict(target_products, output_type='probability')
target_products.select_columns(['name', 'review', 'predicted_sentiment']).head()

name,review,predicted_sentiment
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,0.810132599629
Baby Trend Diaper Champ,I LOOOVE this diaper pail! Its the easies ...,0.940854195421
Baby Trend Diaper Champ,My baby is now 8 months and the can has been ...,0.879880933336
Baby Trend Diaper Champ,I don't know how we survived the first year ...,0.810132599629
Baby Trend Diaper Champ,Diaper Champ or Diaper Genie? That was my ...,0.810132599629
Baby Trend Diaper Champ,I wanted this diaper pail over the Genie b/c I ...,0.810132599629
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,0.810132599629
Baby Trend Diaper Champ,"I received the Champ at my baby shower, and ...",0.810132599629
Baby Trend Diaper Champ,This diaper pail is great. It is the third ...,0.810132599629
Baby Trend Diaper Champ,"As a first time mother, I wanted to get the best ...",0.940854195421


In [82]:
positive_review = target_products[0]['review']
for word in selected_words:
    positive_review = positive_review.replace(word, '\x1b[31m\"{word}\"\x1b[0m'.format(word=word))

print("Most positive review: {0}".format(positive_review))

Most positive review: Baby Luke can turn a clean diaper to a dirty diaper in 3 seconds flat. The diaper champ turns the smelly diaper into "what diaper smell" in less time than that. I hesitated and wondered what I REALLY needed for the nursery. This is one of the best purchases we made. The champ, the baby bjorn, fluerville diaper bag, and graco pack and play bassinet all vie for the best baby purchase.Great product, easy to use, economical, effective, absolutly fabulous.UpdateI knew that I [31m"love"[0md the champ, and useing the diaper genie at a friend's house REALLY reinforced that!! There is no comparison, the chanp is easy and smell free, the genie was difficult to use one handed (which is absolutly vital if you have a little one on a changing pad) and there was a deffinite odor eminating from the genieplus we found that the quick tie garbage bags where the ties are integrated into the bag work really well because there isn't any added bulk around the sealing edge of the champ

