# Simple polarity calculator
This program aims to classify Amazon product reviews as `positive`,`neutral` or `negative`.

In [1]:
# import all necessary libraries
import re
from pathlib import Path
import random

import pandas as pd
import textblob

In [2]:
# we use the following variables to define the filepaths of the input data
# to prevent problems with relative filenames
DATA_FILE = Path().resolve() / 'data' / 'fourth.csv'

In [3]:
# this function will remove all unncessary parts of the text that are not 
# related to the sentiment itself
def remove_noise(text):
    text = re.sub(r'https?\/\/\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    return text 

In [4]:
# load all the data into a dataframe
df = pd.read_csv(DATA_FILE)

# display the first 5 entries
df.head()

Unnamed: 0,id,name,asins,brand,categories,keys,manufacturer,reviews.date,reviews.dateAdded,reviews.dateSeen,...,reviews.doRecommend,reviews.id,reviews.numHelpful,reviews.rating,reviews.sourceURLs,reviews.text,reviews.title,reviews.userCity,reviews.userProvince,reviews.username
0,AVpfl8cLLJeJML43AE3S,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,","B00L9EPT8O,B01E6AO69U",Amazon,"Stereos,Remote Controls,Amazon Echo,Audio Dock...","echowhite/263039693056,echowhite/152558276095,...",Amazon,2017-09-20T00:00:00.000Z,,2017-09-28T00:00:00Z,...,True,,0.0,5,http://reviews.bestbuy.com/3545/5588528/review...,Great personal assistant. Sometimes when music...,Much more than I was expecting,,,bfun
1,AVphgVaX1cnluZ0-DR74,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",B018Y229OU,Amazon,"Fire Tablets,Tablets,Computers & Tablets,All T...",firetablet7displaywifi8gbincludesspecialoffers...,Amazon,2016-07-08T00:00:00.000Z,2017-05-21T02:57:49Z,"2017-04-30T00:20:00.000Z,2017-06-07T08:18:00.000Z",...,True,,0.0,5,http://reviews.bestbuy.com/3545/5025800/review...,My kids are growing out of their ipod touch an...,Great gift for kids at a great price,,,have89
2,AVpjEN4jLJeJML43rpUe,"Fire Kids Edition Tablet, 7 Display, Wi-Fi, 16...",B018Y225IA,Amazon,"Computers/Tablets & Networking,Tablets & eBook...","841667103143,0841667103143,brandnewamazonkindl...",Amazon,2016-07-16T00:00:00.000Z,,"2017-08-27T00:00:00Z,2017-08-09T00:00:00Z,2017...",...,True,,0.0,5,http://reviews.bestbuy.com/3545/5025500/review...,You couldn't get a better buy for $59 than the...,Great tablet!!,,,toons4ever
3,AVpfl8cLLJeJML43AE3S,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,","B00L9EPT8O,B01E6AO69U",Amazon,"Stereos,Remote Controls,Amazon Echo,Audio Dock...","echowhite/263039693056,echowhite/152558276095,...",Amazon,2017-09-24T00:00:00.000Z,,2017-09-28T00:00:00Z,...,True,,0.0,5,http://reviews.bestbuy.com/3545/5588528/review...,Handy and entertaining device to have. Ask it ...,More capabilities added every week,,,GG1953
4,AV1YnRtnglJLPUi8IJmV,Amazon Kindle Paperwhite - eBook reader - 4 GB...,B00OQVZDJM,Amazon,"Walmart for Business,Office Electronics,Tablet...","amazon/b00oqvzdjm,848719056099,amazonkindlepap...",Amazon,2016-08-26T00:00:00.000Z,2017-09-05T22:09:30Z,"2017-08-31T22:33:27.350Z,2017-08-02T19:54:00.4...",...,True,,0.0,5,http://reviews.bestbuy.com/3545/9439005/review...,This is my second Kindle Paperwhite. My first ...,Love my Kindle,,,BarbMS


In [5]:
# show all the headers
df.columns.values

array(['id', 'name', 'asins', 'brand', 'categories', 'keys',
       'manufacturer', 'reviews.date', 'reviews.dateAdded',
       'reviews.dateSeen', 'reviews.didPurchase', 'reviews.doRecommend',
       'reviews.id', 'reviews.numHelpful', 'reviews.rating',
       'reviews.sourceURLs', 'reviews.text', 'reviews.title',
       'reviews.userCity', 'reviews.userProvince', 'reviews.username'],
      dtype=object)

In [6]:
# get the names of all the colums that need to be dropped
drop_columns = filter(lambda x: x not in ['name', 'reviews.text'], df.columns.values)

# drop all columns except the `name`, `id` and `reviews.text` 
df.drop(drop_columns, inplace=True, axis=1)

In [7]:
# display the newly filtered dataframe
df.head()

Unnamed: 0,name,reviews.text
0,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Great personal assistant. Sometimes when music...
1,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",My kids are growing out of their ipod touch an...
2,"Fire Kids Edition Tablet, 7 Display, Wi-Fi, 16...",You couldn't get a better buy for $59 than the...
3,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Handy and entertaining device to have. Ask it ...
4,Amazon Kindle Paperwhite - eBook reader - 4 GB...,This is my second Kindle Paperwhite. My first ...


In [8]:
# define a function to classify the polarity of an input statement
def get_polarity(text: str) -> float:
    return textblob.TextBlob(text).polarity

In [9]:
# find the polarity of each row and add it to a column in the dataframe
df['polarity'] = df['reviews.text'].apply(get_polarity)

# show the first 5 rows of data
df.head()

Unnamed: 0,name,reviews.text,polarity
0,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Great personal assistant. Sometimes when music...,0.291429
1,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",My kids are growing out of their ipod touch an...,0.266667
2,"Fire Kids Edition Tablet, 7 Display, Wi-Fi, 16...",You couldn't get a better buy for $59 than the...,0.266667
3,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Handy and entertaining device to have. Ask it ...,0.5
4,Amazon Kindle Paperwhite - eBook reader - 4 GB...,This is my second Kindle Paperwhite. My first ...,0.301667


In [10]:
# define a function to classify a text as `positive`, 'negative` or `neutral` 
# according to it's polarity value
def get_analysis(polarity: float) -> str:
    if polarity < 0:
        return 'negative'
    elif polarity == 0:
        return 'neutral'
    else:
        return 'positive'

In [11]:
# declare each row as `positive`, 'negative` or `neutral` and store it in a column in the dataframe
df['analysis'] = df['polarity'].apply(get_analysis)

# show the first 5 rows of data
df.head()

Unnamed: 0,name,reviews.text,polarity,analysis
0,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Great personal assistant. Sometimes when music...,0.291429,positive
1,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",My kids are growing out of their ipod touch an...,0.266667,positive
2,"Fire Kids Edition Tablet, 7 Display, Wi-Fi, 16...",You couldn't get a better buy for $59 than the...,0.266667,positive
3,"Amazon Fire Tv,,,\nAmazon Fire Tv,,,",Handy and entertaining device to have. Ask it ...,0.5,positive
4,Amazon Kindle Paperwhite - eBook reader - 4 GB...,This is my second Kindle Paperwhite. My first ...,0.301667,positive


In [12]:
# sort the reviews, keeping negative reviews at first
df = df.sort_values(by=['polarity'])

In [13]:
# show the first 5 rows of the newly sorted dataframe
df.head()

Unnamed: 0,name,reviews.text,polarity,analysis
162,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",My mom lives her tablet! She is obsessed with ...,-0.78125,negative
277,,Purchased as a gift for my husband. He is not ...,-0.6,negative
945,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...",I gave this as a gift. The person that receive...,-0.6,negative
677,"Fire Tablet, 7 Display, Wi-Fi, 8 GB - Includes...","Every time I open my amazon Fire, games pop up...",-0.55,negative
23,"All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...",I had to return my device due to WIFI signal d...,-0.4125,negative


In [14]:
# show the first 5 rows of the newly sorted dataframe
df.tail()

Unnamed: 0,name,reviews.text,polarity,analysis
514,"Echo (White),,,\nEcho (White),,,",This perfect for my mom who loves to listen to...,1.0,positive
154,,"Awesome device, no cable for...one in every ro...",1.0,positive
963,,Got my FireTv box with kodi and is working awe...,1.0,positive
753,"Echo (White),,,\nEcho (White),,,",Home assistant x bluetooth speaker. Alexa is a...,1.0,positive
445,,Excellent tablet both for reading books and fo...,1.0,positive


In [15]:
# select a few random rows and show the exact text
for index, row in enumerate(sorted(df.iterrows(), key=lambda s: random.random()), 1):
    if index == 6:
        break
    text = row[1][1]
    polarity = round(row[1][2], 2)
    analysis = row[1][3]
    print(f"{index}) '{str(text)}' was classified as '{analysis}' with a polarity of '{polarity}'")

1) 'The perfect gift for someone who needs company. She talks to Alexa and told me she appreciates that's Alexa is very polite.' was classified as 'positive' with a polarity of '0.6'
2) 'Set up was Extreamly easy. Hooks up well with other products like TrackR, Nest and Phillips Hue. Most of all my wife gets all her songs by just asking for them. When she forgets where her phone is she simply ask it to have TrackR ring her phone and when she's cold or hot she just asks for it to be warmer or cooler.' was classified as 'positive' with a polarity of '0.08'
3) 'This has been an exceptional hit for us she totally loves it.' was classified as 'positive' with a polarity of '0.33'
4) 'Handy and entertaining device to have. Ask it anything and it will answer. Haven't tried advanced features yet.' was classified as 'positive' with a polarity of '0.5'
5) 'i bought it as a replacement for my son's tablet that got broken. he likes to watch kids youtube app in the evening when we are winding down bu