#Predicting sentiment from product reviews

#Fire up GraphLab Create

In [1]:
import graphlab

#Read some product review data

Loading reviews for a set of baby products. 

In [2]:
products = graphlab.SFrame('amazon_baby.gl/')

[INFO] GraphLab Create v1.8.2 started. Logging: C:\Users\PROID_~1\AppData\Local\Temp\graphlab_server_1456297798.log.0


#Let's explore this data together

Data includes the product name, the review text and the rating of the review. 

In [None]:
products.head()

#Build the word count vector for each review

In [3]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])

In [None]:
products

In [4]:
graphlab.canvas.set_target('ipynb')

In [None]:
products['name'].show()

#Examining the reviews for most-sold product:  'Vulli Sophie the Giraffe Teether'

In [None]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [None]:
len(giraffe_reviews)


In [None]:
giraffe_reviews['rating'].show(view='Categorical')

In [None]:
giraffe_reviews['rating'].show()

#Build a sentiment classifier

In [None]:
products['rating'].show(view='Categorical')

##Define what's a positive and a negative sentiment

We will ignore all reviews with rating = 3, since they tend to have a neutral sentiment.  Reviews with a rating of 4 or higher will be considered positive, while the ones with rating of 2 or lower will have a negative sentiment.   

In [5]:
#ignore all 3* reviews
products = products[products['rating'] != 3]

In [6]:
#positive sentiment = 4* or 5* reviews
products['sentiment'] = products['rating'] >=4

In [None]:
products.head()

In [None]:
graphlab.SFrame(products['name','sentiment'])

##Let's train the sentiment classifier

In [7]:
train_data,test_data = products.random_split(.8, seed=0)

In [None]:
train_data,test_data = products.random_split(.8)

In [8]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=['word_count'],
                                                     validation_set=test_data)

#Evaluate the sentiment model

In [None]:
test_data['sentiment'].show(view='Categorical')

In [None]:
sentiment_model['coefficients'].print_rows(15)

In [None]:
sentiment_model.evaluate(test_data)

In [None]:
sentiment_model.evaluate(test_data, metric='roc_curve')

In [None]:
sentiment_model.show(view='Evaluation')

#Applying the learned model to understand sentiment for Giraffe

In [None]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')

In [None]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews)

In [None]:
giraffe_reviews.head()

##Sort the reviews based on the predicted sentiment and explore

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews.head()

##Most positive reviews for the giraffe

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

##Show most negative reviews for giraffe

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']

In [14]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [None]:
def awesome( dic ):
    if 'awesome' in dic:
        count = dic['awesome']
    else :
        count = 0
    return count

In [None]:
products['awesome'].show(view='Categorical')

In [None]:
products[products['awesome']!=0].sort('awesome',ascending =False)

In [15]:
def word( dic ):
    if w in dic:
        count = dic[w]
    else :
        count = 0
    return count

In [None]:
products

In [16]:
for w in selected_words:
    print type(w)
    products[w] = products['word_count'].apply(word)

<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>
<type 'str'>


In [None]:
for w in selected_words:
    print w,': ',products[w].sum()

In [17]:
train_data,test_data = products.random_split(.8, seed=0)

In [None]:
products

In [18]:
selected_words_model=graphlab.logistic_classifier.create(train_data,target='sentiment', features=selected_words,validation_set=test_data)

In [None]:
selected_words_model['coefficients'].print_rows(12)

In [None]:
selected_words_model['coefficients'].sort('value',ascending=False)
selected_words_model['coefficients'].sort('value',ascending=True).print_rows(12)

In [None]:
selected_words_model.evaluate(test_data)

In [None]:
test_data.show()

In [9]:
diaper_champ_reviews=products[products['name']=='Baby Trend Diaper Champ']

In [12]:
diaper_champ_reviews[0:1]

name,review,rating,word_count,sentiment
Baby Trend Diaper Champ,Ok - newsflash. Diapers are just smelly. We've ...,4.0,"{'just': 2L, 'less': 1L, '-': 3L, 'smell- ...",1


In [25]:
sentiment_model.predict(diaper_champ_reviews[0:1], output_type='probability').sort('sentiment',ascending=False)

TypeError: sort() got multiple values for keyword argument 'ascending'

In [19]:
selected_words_model.predict(diaper_champ_reviews[0:1], output_type='probability')

dtype: float
Rows: 1
[0.8706971470401038]

In [22]:
diaper_champ_reviews[0]['review']

"Ok - newsflash.  Diapers are just smelly.  We've had this pail for 2.5 years now.  It was our first and primary one.  There were no major smell problems until after one year, when our son started eating solids.  Also, we change the bag twice weekly as 3 days is about the max for smell-containment.  Around 20-22 months we started shopping for a container that would be less smelly and didn't find one as good.  (We have a cheaper one upstairs which broke immediately and always stunk!)  We finally just put the Diaper Champ in the attic a few months ago and use the cheap one with the flip-up lid - mainly since the cheapo fits inside the cabinet and we didn't notice a big difference in smell-control.  (The most helpful action is to tie the dirty diapers inside a small plastic bag before putting them in the pail.)A couple of our friends have this pail and were pleased until the children started eating solid food and things got stinkier - but that's pretty much the consensus according to many