In [81]:
import pandas as pd
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob

#Load the English language model for SpaCy
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe('spacytextblob')

#Load the dataset
df = pd.read_csv('Consumer_Reviews_Amazon.csv', encoding='unicode_escape')
df.head()

#column names
df.columns




Index(['id', 'dateAdded', 'dateUpdated', 'name', 'asins', 'brand',
       'categories', 'primaryCategories', 'imageURLs', 'keys', 'manufacturer',
       'manufacturerNumber', 'reviews.date', 'reviews.dateAdded',
       'reviews.dateSeen', 'reviews.doRecommend', 'reviews.id',
       'reviews.numHelpful', 'reviews.rating', 'reviews.sourceURLs',
       'reviews.text', 'reviews.title', 'reviews.username', 'sourceURLs'],
      dtype='object')

In [82]:
#select 'reviews.text' column
#dropna() method removes the rows that contains NULL values.
reviews_data=df[['reviews.text']].dropna()
reviews_data.head()


Unnamed: 0,reviews.text
0,I thought it would be as big as small paper bu...
1,This kindle is light and easy to use especiall...
2,Didnt know how much i'd use a kindle so went f...
3,I am 100 happy with my purchase. I caught it o...
4,Solid entry level Kindle. Great for kids. Gift...


In [83]:
# Function to preprocess text data
def preprocess_text (text):
     # Remove stopwords and perform any additional text cleaning
    doc = nlp(text)
    processed_text = ' '.join([token.text for token in doc if not token.is_stop])
    return processed_text

In [84]:
# Function for sentiment analysis
def analyze_sentiment(review):
    # Preprocess the review text
    processed_review = preprocess_text(review)
    
    # Perform sentiment analysis using spaCy
    doc = nlp(processed_review)
    
    # Determine sentiment (for simplicity, just using polarity)
    polarity = doc._.polarity
    
    # Return sentiment polarity
    return polarity

In [85]:
# Test the sentiment analysis function
sample_reviews = [
    "This product is wonderful! I like it.",
    "I could not decide either I am happy or I am disappointed",
    "The product didn't meet my expectations.",
    "This product is definitely useless",
    "I'm very happy with this purchase.",
    #it is sarcastic review
    "Wow, where do I even begin with this AMAZING product from Amazon? Let's start with the fact that it arrived a whole week later than promised, thanks for the speedy delivery, Amazon! And when I finally got my hands on it, surprise surprise, it was broken! But hey, who needs a functioning product anyway, right? And let's not forget about the wonderful customer service experience I had when I tried to get a refund - spent hours on the phone with a robot just to be told I'm out of luck. Thanks for nothing, Amazon! Definitely won't be buying from you again... unless I need another broken item and terrible customer service, of course!"
]

for review in sample_reviews:
    polarity = analyze_sentiment(review)
    print(f"Review: {review} | Polarity: {polarity}")

Review: This product is wonderful! I like it. | Polarity: 1.0
Review: I could not decide either I am happy or I am disappointed | Polarity: 0.025000000000000022
Review: The product didn't meet my expectations. | Polarity: 0.0
Review: This product is definitely useless | Polarity: -0.5
Review: I'm very happy with this purchase. | Polarity: 0.8
Review: Wow, where do I even begin with this AMAZING product from Amazon? Let's start with the fact that it arrived a whole week later than promised, thanks for the speedy delivery, Amazon! And when I finally got my hands on it, surprise surprise, it was broken! But hey, who needs a functioning product anyway, right? And let's not forget about the wonderful customer service experience I had when I tried to get a refund - spent hours on the phone with a robot just to be told I'm out of luck. Thanks for nothing, Amazon! Definitely won't be buying from you again... unless I need another broken item and terrible customer service, of course! | Polarity

In [86]:
#Chose two product reviews 
#I have chosen first and second rows reviews
doc1 = nlp(df['reviews.text'][0])
doc2 = nlp(df['reviews.text'][1])


# Compute cosine similarity
similarity = doc1.similarity(doc2)

print(f"Cosine Similarity: {similarity}")

Cosine Similarity: 0.5644873151556841


  similarity = doc1.similarity(doc2)
