In [40]:
# create a program that performs sentiment analysis 
# use a dataset of product reviews

import pandas as pd
import spacy
from textblob import TextBlob

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Read in the CSV file
df = pd.read_csv('amazon_product_reviews.csv', delimiter=',', 
                 low_memory=False)

df.head(5)

Unnamed: 0,id,name,asins,brand,categories,keys,manufacturer,reviews.date,reviews.dateAdded,reviews.dateSeen,...,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.userCity,reviews.userProvince,reviews.username
0,AVqkIhwDv8e3D1O-lebb,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",B01AHB9CN2,Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...","841667104676,amazon/53004484,amazon/b01ahb9cn2...",Amazon,2017-01-13T00:00:00.000Z,2017-07-03T23:33:15Z,"2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z",...,True,,0.0,5.0,http://reviews.bestbuy.com/3545/5620406/review...,This product so far has not disappointed. My c...,Kindle,,,Adapter
1,AVqkIhwDv8e3D1O-lebb,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",B01AHB9CN2,Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...","841667104676,amazon/53004484,amazon/b01ahb9cn2...",Amazon,2017-01-13T00:00:00.000Z,2017-07-03T23:33:15Z,"2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z",...,True,,0.0,5.0,http://reviews.bestbuy.com/3545/5620406/review...,great for beginner or experienced person. Boug...,very fast,,,truman
2,AVqkIhwDv8e3D1O-lebb,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",B01AHB9CN2,Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...","841667104676,amazon/53004484,amazon/b01ahb9cn2...",Amazon,2017-01-13T00:00:00.000Z,2017-07-03T23:33:15Z,"2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z",...,True,,0.0,5.0,http://reviews.bestbuy.com/3545/5620406/review...,Inexpensive tablet for him to use and learn on...,Beginner tablet for our 9 year old son.,,,DaveZ
3,AVqkIhwDv8e3D1O-lebb,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",B01AHB9CN2,Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...","841667104676,amazon/53004484,amazon/b01ahb9cn2...",Amazon,2017-01-13T00:00:00.000Z,2017-07-03T23:33:15Z,"2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z",...,True,,0.0,4.0,http://reviews.bestbuy.com/3545/5620406/review...,I've had my Fire HD 8 two weeks now and I love...,Good!!!,,,Shacks
4,AVqkIhwDv8e3D1O-lebb,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",B01AHB9CN2,Amazon,"Electronics,iPad & Tablets,All Tablets,Fire Ta...","841667104676,amazon/53004484,amazon/b01ahb9cn2...",Amazon,2017-01-12T00:00:00.000Z,2017-07-03T23:33:15Z,"2017-06-07T09:04:00.000Z,2017-04-30T00:45:00.000Z",...,True,,0.0,5.0,http://reviews.bestbuy.com/3545/5620406/review...,I bought this for my grand daughter when she c...,Fantastic Tablet for kids,,,explore42


In [41]:
df.shape

(34660, 21)

In [42]:
# check the columns that appear to have most NaN values
# see how many are missing
reviewsid_nan_count = df['reviews.id'].isna().sum()
print(reviewsid_nan_count)

reviewsusercity_nan_count = df['reviews.userCity'].isna().sum()
print(reviewsusercity_nan_count)

reviewsuserprovincce_nan_count = df['reviews.userProvince'].isna().sum()
print(reviewsuserprovincce_nan_count)

reviewstext_nan_count = df['reviews.text'].isna().sum()
print(reviewstext_nan_count)

34659
34660
34660
1


In [43]:
# remove the columns with mostly/all NaN values from the data
df = df.drop('reviews.id', axis=1)

In [44]:
df = df.drop('reviews.userCity', axis=1)
df = df.drop('reviews.userProvince', axis=1)

In [45]:
df.shape

(34660, 18)

In [46]:
# remove the only row from 'reviews.text' column with NaN value
df.dropna(subset=['reviews.text'], inplace=True)

In [47]:
df.shape

(34659, 18)

In [48]:
# take all the reviews.text data and seperate into new variable
reviews_data = df['reviews.text']

In [49]:
# take 10 randomly selected reviews for testing the model
reviews_data = reviews_data.sample(10)

In [50]:
# Create a function to clean the text
# remove stop words, lower and strip
# Join the cleaned tokens into a single string
def clean_text(text):
    
    doc = nlp(text)
    cleaned_tokens = [token.text.lower().strip()
                       for token in doc if not token.is_stop]
    
    cleaned_text = ' '.join(cleaned_tokens)
    return cleaned_text


In [51]:
# Apply the function to the reviews_data variable holding the samples
reviews_data = reviews_data.apply(clean_text)

In [52]:
# Define a function that analyses sentiment and polarity using textblob 
# if statement used to provide sentiment label based on polarity score
def analyse_sentiment(review):

    textblob_sentiment = TextBlob(review).sentiment
    textblob_polarity = textblob_sentiment.polarity

    if textblob_polarity < 0:
        sentiment_label = "Negative"
    elif textblob_polarity == 0:
        sentiment_label = "Neutral"
    else:
        sentiment_label = "Positive"

    return textblob_sentiment, textblob_polarity, sentiment_label

In [53]:
# Apply function to the sample reviews
sentiment_results = reviews_data.apply(analyse_sentiment)

In [54]:
# Display the results
# Use for loop and enumerate to loop through and keep track of index
# Print back to user in readable format
for i, result in enumerate(sentiment_results):
    review = reviews_data.iloc[i]
    textblob_sentiment, textblob_polarity, sentiment_label = result
    print(f"Review: {review}")
    print(f"TextBlob Sentiment: {textblob_sentiment}")
    print(f"TextBlob Polarity: {textblob_polarity}")
    print(f"Sentiment Label: {sentiment_label}")
    print()

Review: youngest daughter pleased tablet .
TextBlob Sentiment: Sentiment(polarity=0.5, subjectivity=1.0)
TextBlob Polarity: 0.5
Sentiment Label: Positive

Review: easy use . great price starter tablet ! bought 3 , 1 & 2 christmas gifts family . feels good spirit giving know use everyday . :)
TextBlob Sentiment: Sentiment(polarity=0.4866666666666667, subjectivity=0.7566666666666667)
TextBlob Polarity: 0.4866666666666667
Sentiment Label: Positive

Review: kindle awesome . great picture fast internet .
TextBlob Sentiment: Sentiment(polarity=0.6666666666666666, subjectivity=0.7833333333333333)
TextBlob Polarity: 0.6666666666666666
Sentiment Label: Positive

Review: works stuff . speak shall comply .
TextBlob Sentiment: Sentiment(polarity=0.0, subjectivity=0.0)
TextBlob Polarity: 0.0
Sentiment Label: Neutral

Review: right size travel . perfect reading . .
TextBlob Sentiment: Sentiment(polarity=0.6428571428571428, subjectivity=0.7678571428571428)
TextBlob Polarity: 0.6428571428571428
Sentim