In [1]:
import numpy as np
import pandas as pd
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
from textblob import TextBlob

In [2]:
#Loading the dataset
dataframe=pd.read_csv('amazon_product_reviews.csv')
dataframe.head()

Unnamed: 0,id,dateAdded,dateUpdated,name,asins,brand,categories,primaryCategories,imageURLs,keys,...,reviews.dateSeen,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.username,sourceURLs
0,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,"2018-05-27T00:00:00Z,2017-09-18T00:00:00Z,2017...",False,,0,3,http://reviews.bestbuy.com/3545/5442403/review...,I thought it would be as big as small paper bu...,Too small,llyyue,https://www.newegg.com/Product/Product.aspx%25...
1,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,"2018-05-27T00:00:00Z,2017-07-07T00:00:00Z,2017...",True,,0,5,http://reviews.bestbuy.com/3545/5442403/review...,This kindle is light and easy to use especiall...,Great light reader. Easy to use at the beach,Charmi,https://www.newegg.com/Product/Product.aspx%25...
2,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-05-27T00:00:00Z,True,,0,4,https://reviews.bestbuy.com/3545/5442403/revie...,Didnt know how much i'd use a kindle so went f...,Great for the price,johnnyjojojo,https://www.newegg.com/Product/Product.aspx%25...
3,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-10-09T00:00:00Z,True,177283626.0,3,5,https://redsky.target.com/groot-domain-api/v1/...,I am 100 happy with my purchase. I caught it o...,A Great Buy,Kdperry,https://www.newegg.com/Product/Product.aspx%25...
4,AVqVGZNvQMlgsOJE6eUY,2017-03-03T16:56:05Z,2018-10-25T16:36:31Z,"Amazon Kindle E-Reader 6"" Wifi (8th Generation...",B00ZV9PXP2,Amazon,"Computers,Electronics Features,Tablets,Electro...",Electronics,https://pisces.bbystatic.com/image2/BestBuy_US...,allnewkindleereaderblack6glarefreetouchscreend...,...,2018-05-27T00:00:00Z,True,,0,5,https://reviews.bestbuy.com/3545/5442403/revie...,Solid entry level Kindle. Great for kids. Gift...,Solid entry-level Kindle. Great for kids,Johnnyblack,https://www.newegg.com/Product/Product.aspx%25...


In [3]:
dataframe.shape

(5000, 24)

In [4]:
# Remove missing values from the 'reviews.text' column
clean_data = dataframe['reviews.text'].dropna()
print(clean_data.isnull().sum()) 
clean_data



0


0       I thought it would be as big as small paper bu...
1       This kindle is light and easy to use especiall...
2       Didnt know how much i'd use a kindle so went f...
3       I am 100 happy with my purchase. I caught it o...
4       Solid entry level Kindle. Great for kids. Gift...
                              ...                        
4995    This is a great tablet for the price. Amazon i...
4996    This tablet is the perfect size and so easy to...
4997    Purchased this for my son. Has room to upgrade...
4998    I had some thoughts about getting this for a 5...
4999    this is a steal, have 8 gb model as well.This ...
Name: reviews.text, Length: 5000, dtype: object

In [5]:
# Load the spaCy model and add the SpacyTextBlob component
nlp=spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')

#defining a function to preprocess the data
def preprocess_text(review):
    doc=nlp(review.lower().strip())
    processed = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    processed_text=' '.join(processed)
    return processed_text
    


In [6]:
processed_data=clean_data.apply(preprocess_text)
processed_data

0       think big small paper turn like palm think sma...
1                  kindle light easy use especially beach
2       not know use kindle go low end m happy little ...
3       100 happy purchase catch sale good price norma...
4       solid entry level kindle great kid gift kid fr...
                              ...                        
4995                   great tablet price amazon good job
4996    tablet perfect size easy use read play game pu...
4997    purchase son room upgrade memory allow book ga...
4998    thought get 5 year old screen protector case f...
4999                     steal 8 gb model well.this punch
Name: reviews.text, Length: 5000, dtype: object

In [7]:

# Define a function for sentiment analysis using spaCy's sentiment attribute
def analyze_sentiment(review):
    # Process the review text with spaCy
    doc = nlp(review)
    
    # Retrieve the sentiment score from spaCy's sentiment attribute
    sentiment = doc._.blob.sentiment

    # Determine the sentiment based on the polarity score
    return sentiment



# Define a function for sentiment analysis using TextBlob's polarity attribute
def analyze_sentiment_2(review):
    # Process the review text with spaCy
    doc = nlp(review)
    # Convert the spaCy Doc object to a TextBlob object
    blob = TextBlob(doc.text)
    # Access the polarity attribute
    polarity_status = blob.sentiment.polarity  
    # Determine the sentiment based on the polarity score
    
    if polarity_status > 0:
        return "Positive"
    elif polarity_status < 0:
        return "Negative"
    else:
        return "Neutral"

In [8]:
# Define a function to calculate sentiment polarity
def calculate_polarity(review):
    # Process the review text with spaCy
    doc = nlp(review)
    
    # Access the sentiment polarity attribute
    polarity = doc._.blob.polarity
    return polarity

In [9]:
# Choose a sample of reviews
sample_reviews = processed_data.iloc[2:10]
print(sample_reviews) 
print('\n\n')


# Analyze sentiment and polarity for each review
review_count = 1
for review in sample_reviews:
    sentiment = analyze_sentiment(review)
    print("Review:", f"Review {review_count}")
    print("Sentiment analysis is:", sentiment)
    
    # Calculate polarity for each review
    polarity = calculate_polarity(review)
    polarity_status = analyze_sentiment_2(review)
   
    print(f'Polarity is: {polarity}')
    print(f'Polarity status is: {polarity_status}')
    print('\n\n\n')
    review_count += 1

    print(f'polarity is: {polarity}')
    print(f'polarity status is: {polarity_status}')
    print('\n\n\n')

2    not know use kindle go low end m happy little ...
3    100 happy purchase catch sale good price norma...
4    solid entry level kindle great kid gift kid fr...
5    excellent ebook reader expect device read basi...
6    order daughter black paperwhite love read bit ...
7           buy kindle 2 month ago battery dead charge
8           amazon kindle good ebook upgrade new model
9                    expectation music score fast turn
Name: reviews.text, dtype: object



Review: Review 1
Sentiment analysis is: Sentiment(polarity=0.115625, subjectivity=0.55)
Polarity is: 0.115625
Polarity status is: Positive




polarity is: 0.115625
polarity status is: Positive




Review: Review 2
Sentiment analysis is: Sentiment(polarity=0.2767857142857143, subjectivity=0.5755102040816327)
Polarity is: 0.2767857142857143
Polarity status is: Positive




polarity is: 0.2767857142857143
polarity status is: Positive




Review: Review 3
Sentiment analysis is: Sentiment(polarity=0.43200000000000005, sub

In [10]:
# Choose two product reviews from the 'review.text' column
review1 = processed_data[0]  # Selecting review at index 2
review2 = processed_data[1]  # Selecting review at index 3

# Function to compare similarity between two reviews
def compare_similarity(review1, review2):
    # Process the reviews with spaCy
    doc1 = nlp(review1)
    doc2 = nlp(review2)
    
    # Calculate similarity score
    similarity_score = doc1.similarity(doc2)
    return similarity_score

# Compare similarity between the selected reviews
similarity_score = compare_similarity(review1, review2)


# Print the similarity score
print(f'Similarity score between the two reviews: "{review1}" AND "{review2}":, {similarity_score}')

Similarity score between the two reviews: "think big small paper turn like palm think small read comfortable regular kindle definitely recommend paperwhite instead" AND "kindle light easy use especially beach":, 0.7125896004164829


  similarity_score = doc1.similarity(doc2)
