This code is intended to analyze the sentiment of the customer reviews for Amazon Products.

In [238]:
import spacy
import pandas as pd
from textblob import TextBlob

In [224]:
# Loaded en_core_web_md as sm is not supporting .similarity()
nlp = spacy.load('en_core_web_md')

In [225]:
# Read the amazon_product_reviews.csv file into a dataframe.
dataframe = pd.read_csv('amazon_product_reviews.csv', low_memory=False)

In [226]:
# Display the columns of the dataframe.
dataframe.columns

Index(['id', 'name', 'asins', 'brand', 'categories', 'keys', 'manufacturer',
       'reviews.date', 'reviews.dateAdded', 'reviews.dateSeen',
       'reviews.didPurchase', 'reviews.doRecommend', 'reviews.id',
       'reviews.numHelpful', 'reviews.rating', 'reviews.sourceURLs',
       'reviews.text', 'reviews.title', 'reviews.userCity',
       'reviews.userProvince', 'reviews.username'],
      dtype='object')

In [227]:
# Create another dataframe only with the reviews column.
reviews_data = dataframe[['reviews.text']]
reviews_data

Unnamed: 0,reviews.text
0,This product so far has not disappointed. My c...
1,great for beginner or experienced person. Boug...
2,Inexpensive tablet for him to use and learn on...
3,I've had my Fire HD 8 two weeks now and I love...
4,I bought this for my grand daughter when she c...
...,...
34655,This is not appreciably faster than any other ...
34656,Amazon should include this charger with the Ki...
34657,Love my Kindle Fire but I am really disappoint...
34658,I was surprised to find it did not come with a...


In [228]:
# Check the NAN values.
reviews_data.isnull().sum()

reviews.text    1
dtype: int64

In [229]:
# Drop all the rows with NAN values from the dataframe.
reviews_data = reviews_data.dropna()
reviews_data

Unnamed: 0,reviews.text
0,This product so far has not disappointed. My c...
1,great for beginner or experienced person. Boug...
2,Inexpensive tablet for him to use and learn on...
3,I've had my Fire HD 8 two weeks now and I love...
4,I bought this for my grand daughter when she c...
...,...
34655,This is not appreciably faster than any other ...
34656,Amazon should include this charger with the Ki...
34657,Love my Kindle Fire but I am really disappoint...
34658,I was surprised to find it did not come with a...


In [230]:
# Create a function to preprocess/clean the data.
def preprocess(text):
    text.strip()
    doc = nlp(text)
    return ' '.join([token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct])

In [231]:
# Apply the preprocess function to review_text column 
# and create another column with processed data.
reviews_data['processed_review'] = reviews_data['reviews.text'].apply(preprocess)
reviews_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reviews_data['processed_review'] = reviews_data['reviews.text'].apply(preprocess)


Unnamed: 0,reviews.text,processed_review
0,This product so far has not disappointed. My c...,product far disappoint child love use like abi...
1,great for beginner or experienced person. Boug...,great beginner experienced person buy gift love
2,Inexpensive tablet for him to use and learn on...,inexpensive tablet use learn step nabi thrille...
3,I've had my Fire HD 8 two weeks now and I love...,fire hd 8 week love tablet great value prime m...
4,I bought this for my grand daughter when she c...,buy grand daughter come visit set user enter a...
...,...,...
34655,This is not appreciably faster than any other ...,appreciably fast 1.8 high amp charger kindle k...
34656,Amazon should include this charger with the Ki...,amazon include charger kindle fact charge extr...
34657,Love my Kindle Fire but I am really disappoint...,love kindle fire disappointed kindle power fas...
34658,I was surprised to find it did not come with a...,surprised find come type charge cord purchase ...


In [234]:
# Create a function to analyze the sentiment of the reviews.
def analyze_sentiment(text):

    blob = TextBlob(text)
    polarity = blob.sentiment.polarity

    if polarity < 0:
        sentiment = 'Negative'
    elif polarity > 0:
        sentiment = 'Positive'
    else:
        sentiment = 'Neutral'

    return sentiment



In [235]:
# Creata  a sample of data to test the function's efficiency.
reviews_data = reviews_data.sample(4000, random_state=23)
reviews_data.head(10)

Unnamed: 0,reviews.text,processed_review
15607,This tablet is good for kids but the battery l...,tablet good kid battery life long
29152,Fun gadget for adults and kids. Great range fo...,fun gadget adult kid great range ask question
32860,Easy to install and use! Love it. I am tired o...,easy install use love tired gouge cable
6074,My kids really like this tablet. It's easy to ...,kid like tablet easy use
18928,You really can't go wrong here its exactly wha...,wrong exactly want e reader
20753,Fantastic E-Reader. Battery lasts a few weeks ...,fantastic e reader battery last week single ch...
32045,Great little device it reacts fast to the remo...,great little device react fast remote fast fir...
8158,I purchased this item on sale as an alternativ...,purchase item sale alternative tab android rc ...
31332,Good value for the money...easy to set up and ...,good value money easy set use recommend
32190,I have no idea why I did not buy this any soon...,idea buy soon work awesome


In [214]:
# Apply the function to the sample to analyse the sentiment.
reviews_data['sentiment'] = reviews_data['processed_review'].apply(analyze_sentiment)
reviews_data

Unnamed: 0,reviews.text,processed_review,sentiment
25377,always learning new things that it can do to m...,learn new thing life easier speaker little lou...,Positive
21930,This is the ideal tablet for my 8 year old son...,ideal tablet 8 year old son 1,Positive
32002,"Overall, very easy to install. I use stick on ...",overall easy install use stick velcro attach t...,Positive
16739,So far so go it's been hard to find a good tab...,far hard find good tablet 6yr old believe find,Positive
9017,The screen is beautiful. It does everything th...,screen beautiful ipad definitely settle keep m...,Positive
...,...,...,...
2935,I purchased this product with an eye on simply...,purchase product eye simply try tablet product...,Positive
15242,Worst product ever. Constantly screwing up and...,bad product constantly screw fill ad pay remov...,Positive
23937,"Compact, great playback quality. Heard about t...",compact great playback quality hear device fri...,Positive
22393,Great sound from a small speaker. Responds wel...,great sound small speaker respond verbal command,Positive


In [237]:
# Take 2 reviews of choice and compare their similarity.
my_review_of_choice1 = dataframe['reviews.text'][1]
print(f"review 1: {my_review_of_choice1}")
my_review_of_choice1 = nlp(my_review_of_choice1)
my_review_of_choice2 = dataframe['reviews.text'][2]
print(f"review 2: {my_review_of_choice2}")
my_review_of_choice2 = nlp(my_review_of_choice2)
print(f"The similarity is {my_review_of_choice1.similarity(my_review_of_choice2)}")

review 1: great for beginner or experienced person. Bought as a gift and she loves it
review 2: Inexpensive tablet for him to use and learn on, step up from the NABI. He was thrilled with it, learn how to Skype on it already...
The similarity is 0.7935413122974446
