# Introduction

## Commerical Product Review Sentiment Analysis

Customer reviews from Amazon and other customer review sites will be analyzed for customer sentiment.

### Method Approach

# Imports

In [70]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression, Ridge, Lasso, RidgeCV, LassoCV
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.metrics import r2_score, confusion_matrix, roc_auc_score

import requests
import time
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from bs4 import BeautifulSoup
from nltk.stem.porter import PorterStemmer
import regex as re

# Gather Data

In [71]:
amazon_reviews = pd.read_excel('./data/K Mini Reviews.xlsx')
amazon_reviews.head()

Unnamed: 0,ID,FISCAL_YEAR,FISCAL_PERIOD,FISCAL_PERIOD_NUM_COUNTER,FISCAL_WEEK_NUM_OF_YEAR,REVIEW_DATE,BRAND,PRODUCT,PRODUCT_ID,RATING,...,TAG,NPS,STATUS,REPIED_TO,INTERNAL_COMMENTS,PERMALINK,SENTIMENT,ALIAS,NEWLY_LOADED,CURRENT_PERIOD
0,573932,2019,4,233,14,2019-03-31,Keurig,"Keurig K-Mini Single Serve Coffee Maker, Oasis",39021,1,...,Single-Serve Brewer,0,Open Ticket,Yes,,,NEGATIVE,K-Mini,0,0
1,564358,2019,2,231,8,2019-02-20,Keurig,"Keurig K-Mini Single Serve Coffee Maker, Black",39022,1,...,Single-Serve Brewer,0,Open Ticket,No,,,NEGATIVE,K-Mini,0,0
2,573987,2019,4,233,14,2019-04-01,Keurig,"Keurig K-Mini Single Serve Coffee Maker, Black",39022,1,...,Single-Serve Brewer,0,Open Ticket,Yes,,,NEGATIVE,K-Mini,0,0
3,574096,2019,4,233,14,2019-04-03,Keurig,"Keurig K-Mini Single Serve Coffee Maker, Studi...",39023,3,...,Single-Serve Brewer,0,Open Ticket,Yes,,,NEUTRAL,K-Mini,0,0
4,574136,2019,4,233,14,2019-04-04,Keurig,"Keurig K-Mini Single Serve Coffee Maker, Black",39022,4,...,Single-Serve Brewer,0,Open Ticket,Yes,,,POSITIVE,K-Mini,0,0


In [72]:
all_reviews = pd.read_excel('./data/K_Mini_Reviews-2019-06-27.xlsx')
all_reviews.head()

Unnamed: 0,ID,FISCAL_YEAR,FISCAL_PERIOD,FISCAL_PERIOD_NUM_COUNTER,FISCAL_WEEK_NUM_OF_YEAR,REVIEW_DATE,BRAND,PRODUCT,PRODUCT_ID,RATING,...,TAG,NPS,STATUS,REPIED_TO,INTERNAL_COMMENTS,PERMALINK,SENTIMENT,ALIAS,NEWLY_LOADED,CURRENT_PERIOD
0,319852,2018,6,223,25,2018-06-22,Keurig,Keurig K-Mini Single Serve K-Cup Pod Coffee Maker,31565,4,...,Single-Serve Brewers,0.0,,No,,,POSITIVE,K-Mini,0,0
1,319891,2018,6,223,25,2018-06-23,Keurig,Keurig K-Mini Single Serve K-Cup Pod Coffee Maker,31565,5,...,Single-Serve Brewers,0.0,,No,,,POSITIVE,K-Mini,0,0
2,319892,2018,6,223,25,2018-06-23,Keurig,Keurig K-Mini Single Serve K-Cup Pod Coffee Maker,31565,5,...,Single-Serve Brewers,0.0,,No,,,POSITIVE,K-Mini,0,0
3,319893,2018,6,223,25,2018-06-23,Keurig,Keurig K-Mini Single Serve K-Cup Pod Coffee Maker,31565,5,...,Single-Serve Brewers,0.0,,No,,,POSITIVE,K-Mini,0,0
4,319929,2018,6,223,26,2018-06-24,Keurig,Keurig K-Mini Single Serve K-Cup Pod Coffee Maker,31565,5,...,Single-Serve Brewers,0.0,,No,,,POSITIVE,K-Mini,0,0


# EDA

In [73]:
all_reviews.dtypes

ID                                    int64
FISCAL_YEAR                           int64
FISCAL_PERIOD                         int64
FISCAL_PERIOD_NUM_COUNTER             int64
FISCAL_WEEK_NUM_OF_YEAR               int64
REVIEW_DATE                  datetime64[ns]
BRAND                                object
PRODUCT                              object
PRODUCT_ID                            int64
RATING                                int64
SOURCE                               object
REVIEWER_NAME                        object
REVIEW                               object
TITLE                                object
SYNDICATED                           object
INCENTIVE_PROGRAM                    object
VERIFIED_REVIEW                      object
PRICE                               float64
SIZE                                float64
COLOR                                object
EDITION                             float64
STYLE                               float64
OTHER_ATTRIBUTES                

In [74]:
amazon_reviews.dtypes

ID                                    int64
FISCAL_YEAR                           int64
FISCAL_PERIOD                         int64
FISCAL_PERIOD_NUM_COUNTER             int64
FISCAL_WEEK_NUM_OF_YEAR               int64
REVIEW_DATE                  datetime64[ns]
BRAND                                object
PRODUCT                              object
PRODUCT_ID                            int64
RATING                                int64
SOURCE                               object
REVIEWER_NAME                        object
REVIEW                               object
TITLE                                object
SYNDICATED                           object
INCENTIVE_PROGRAM                    object
VERIFIED_REVIEW                      object
PRICE                               float64
SIZE                                float64
COLOR                                object
EDITION                             float64
STYLE                               float64
OTHER_ATTRIBUTES                

In [76]:
list(amazon_reviews.columns)

['ID',
 'FISCAL_YEAR',
 'FISCAL_PERIOD',
 'FISCAL_PERIOD_NUM_COUNTER',
 'FISCAL_WEEK_NUM_OF_YEAR',
 'REVIEW_DATE',
 'BRAND',
 'PRODUCT',
 'PRODUCT_ID',
 'RATING',
 'SOURCE',
 'REVIEWER_NAME',
 'REVIEW',
 'TITLE',
 'SYNDICATED',
 'INCENTIVE_PROGRAM',
 'VERIFIED_REVIEW',
 'PRICE',
 'SIZE',
 'COLOR',
 'EDITION',
 'STYLE',
 'OTHER_ATTRIBUTES',
 'MODEL',
 'SKU',
 'CATEGORY',
 'TAG',
 'NPS',
 'STATUS',
 'REPIED_TO',
 'INTERNAL_COMMENTS',
 'PERMALINK',
 'SENTIMENT',
 'ALIAS',
 'NEWLY_LOADED',
 'CURRENT_PERIOD']

In [78]:
amazon_reviews['NPS'] = amazon_reviews.NPS.astype(float)

In [80]:
amazon_reviews.dtypes

ID                                    int64
FISCAL_YEAR                           int64
FISCAL_PERIOD                         int64
FISCAL_PERIOD_NUM_COUNTER             int64
FISCAL_WEEK_NUM_OF_YEAR               int64
REVIEW_DATE                  datetime64[ns]
BRAND                                object
PRODUCT                              object
PRODUCT_ID                            int64
RATING                                int64
SOURCE                               object
REVIEWER_NAME                        object
REVIEW                               object
TITLE                                object
SYNDICATED                           object
INCENTIVE_PROGRAM                    object
VERIFIED_REVIEW                      object
PRICE                               float64
SIZE                                float64
COLOR                                object
EDITION                             float64
STYLE                               float64
OTHER_ATTRIBUTES                

In [85]:
amazon_reviews.shape

(79, 36)

In [86]:
all_reviews.shape

(1182, 36)

In [83]:
converged_reviews = amazon_reviews[all_reviews.duplicated()]
converged_reviews

  """Entry point for launching an IPython kernel.


Unnamed: 0,ID,FISCAL_YEAR,FISCAL_PERIOD,FISCAL_PERIOD_NUM_COUNTER,FISCAL_WEEK_NUM_OF_YEAR,REVIEW_DATE,BRAND,PRODUCT,PRODUCT_ID,RATING,...,TAG,NPS,STATUS,REPIED_TO,INTERNAL_COMMENTS,PERMALINK,SENTIMENT,ALIAS,NEWLY_LOADED,CURRENT_PERIOD


In [87]:
merged_reviews = pd.merge(amazon_reviews, all_reviews)
merged_reviews.shape

(22, 36)

Notes:
1. 