In [1]:
# Import packages to be used

import pandas as pd
import spacy
from textblob import TextBlob
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the spacy model for natural language processing

nlp = spacy.load('en_core_web_sm')

In [None]:
# Using pandas, Load the product review file
data = pd.read_csv('amazon_product_reviews.csv')

# Select only the review column needed for the analysis
products_reviews = data[['reviews.text']].dropna()
products_reviews.head()

In [7]:
# Create function to preprocess the text and clean the data

def preprocess(text):
    doc = nlp(text)
    return ' '.join([token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct])

In [None]:
# Create a sample of data and apply the preprocess function to it. 
# Display the output in a new column called processed_review. 

reviews_data = products_reviews.sample(5000, random_state=42)
reviews_data['processed_review'] = reviews_data['reviews.text'].apply(preprocess)
reviews_data.head()

In [None]:
# Create a function to get the vectors(frequencies of the words in the text) of the review. 
# Then apply this function to the processed reviews and have a new column called vector display the output. 

def get_vector(text):
    doc = nlp(text)
    return doc.vector

reviews_data['vector'] = reviews_data['processed_review'].apply(get_vector)
reviews_data.head()

In [None]:
# Create a function that uses textblob to get the sentiment and apply it to the processed reviews.
# Then display the output in a new column called sentiment

def get_sentiment(text):

    blob = TextBlob(text)

    sentiment_polarity = blob.sentiment.polarity

    if sentiment_polarity > 0:
        return "Positive"
    elif sentiment_polarity == 0:
        return "Neutral"
    else:
        return "Negative"
    
reviews_data['sentiment'] = reviews_data['processed_review'].apply(get_sentiment)
reviews_data.head()

In [11]:
# Create a function to get the polarity score of the processed reviews.  
# Display the output in a new column. 

def get_sentiment_polarity(text):

    blob = TextBlob(text)

    return blob.sentiment.polarity

reviews_data['polarity_score'] = reviews_data['processed_review'].apply(get_sentiment_polarity)
reviews_data[['reviews.text','processed_review', 'polarity_score', 'sentiment']].iloc[:20]

Unnamed: 0,reviews.text,processed_review,polarity_score,sentiment
19947,Awesome tablet. I was amazed how fast it is. A...,awesome tablet amazed fast software user friendly,0.525,Positive
7445,They don't last. USed in electronics (like com...,electronic like computer mouse computer keyboa...,0.025,Positive
3525,Thx.,thx,0.0,Neutral
24122,kids love it EZ to use great Quality bought th...,kid love ez use great quality buy grand kid su...,0.538889,Positive
25572,The kids feature is great. My 18 month old tak...,kid feature great 18 month old take love block...,0.44375,Positive
11487,Long lasting product good bang for your buck.,long last product good bang buck,0.216667,Positive
24143,We was looking for a easy tablet that has all ...,look easy tablet great feature function young ...,0.426667,Positive
8374,The 48 packs are the lowest price per AA batte...,48 pack low price aa battery find device use w...,0.014545,Positive
23789,Great pad to give the little ones a headstart ...,great pad little one headstart world tech stur...,0.3375,Positive
10630,Love the great deal on these batteries. Being ...,love great deal battery techie dad lot battery...,0.525,Positive


In [14]:
# Using the vectors and cosine similarity, check the similarity score of two reviews. 

review_1 = reviews_data['vector'].iloc[0]
review_2 = reviews_data['vector'].iloc[20]

review_1_text = reviews_data['reviews.text'].iloc[0]
review_2_text = reviews_data['reviews.text'].iloc[20]

vector1 = [review_1]
vector2 = [review_2]

similarity_score = cosine_similarity(vector1, vector2)[0][0]

print("Review 1:")
print(review_1_text)
print("\nReview 2:")
print(review_2_text)
print("\nSimilarity Score:", similarity_score)

Review 1:
Awesome tablet. I was amazed how fast it is. And the software is very user friendly

Review 2:
I bought the Amazon Fire HD 8 for my husband for Christmas. He's never too excited about gifts but he is using this tablet every day and even though he won't admit it, he absolutely loves it. Would definitely buy again, especially during Black Friday specials.

Similarity Score: 0.6941383
