# Imports

In [1]:
import string
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, r2_score
from sklearn.feature_extraction.text import CountVectorizer
from textblob import Word
from tqdm.notebook import tqdm

## Load dataset

In [2]:
# business = pd.read_json('yelp_academic_dataset_business.json', lines=True)
# checkin = pd.read_json('yelp_academic_dataset_checkin.json', lines=True)
# tip = pd.read_json('yelp_academic_dataset_tip.json', lines=True)
# user = pd.read_json('yelp_academic_dataset_user.json', lines=True)

df = pd.read_json('yelp_academic_dataset_review.json', lines=True, nrows=500000)

## Setup Dataframe

In [3]:
df = df[['text', 'stars', 'useful', 'funny', 'cool']]

## Pre Processing

### Fill NA scores with 0

In [4]:
df = df.fillna(0)

### Strip Punctuation

In [5]:
def remove_punctuation(text):
    # Using string.punctuation to get all punctuation characters
    translator = str.maketrans('', '', string.punctuation)
    return text.translate(translator)

In [6]:
df['text'] = df['text'].str.lower()
df['text'] = df['text'].apply(remove_punctuation)

### Add metadata

In [7]:
df['word_count'] = df['text'].apply(lambda x: len(x.split()))
df['char_count'] = df['text'].apply(lambda x: len(x))

### Stop words
Stop words are words deemed to add little or no value to a review. i.e. 'and.'
Stopwords are downloaded from nltk package.

In [8]:
nltk.download('stopwords')
stop_words = stopwords.words('english')
df['stopword_count'] = df['text'].apply(lambda x: len([word for word in x.split() if word.lower() in stop_words]))
df['stopword_rate'] = df['stopword_count'] / df['word_count']

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/davecameron/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [9]:
df['stopwords_removed'] = df['text'].apply(lambda x: " ".join(word for word in x.split() if word not in stop_words))

### Irrelevant words not in stop_words
Looking for remaining words that might be considered stop words.

In [10]:
pd.Series(" ".join(df['stopwords_removed']).split()).value_counts()[:30]

food         262568
good         242324
place        233789
great        227360
service      162133
like         154798
time         153649
get          148224
one          148148
back         140521
would        136725
go           123862
really       118300
also         107095
us            96193
got           94151
nice          90478
best          88098
well          85383
dont          84544
even          84314
staff         82029
ordered       80244
im            79501
ive           78539
friendly      76022
order         75928
love          75718
always        74976
delicious     74122
Name: count, dtype: int64

In [13]:
other_stop_words = ['get', 'would', 'got', 'us', 'also', 'even', 'ive', 'im']

In [14]:
df['clean_reviews'] = df['stopwords_removed'].apply(lambda x: " ".join(word for word in x.split() if word not in other_stop_words))

### Lemmatization
Lemmatization reduces words to their root. i.e. Running is deconjucated to run, etc.

In [15]:
df['lemmatized'] = df['clean_reviews'].apply(lambda x: " ".join(Word(word).lemmatize() for word in x.split()))

In [16]:
df['clean_word_count'] = df['lemmatized'].apply(lambda x: len(x.split()))
df['clean_rate'] = df['clean_word_count'] / df['word_count']

## Sentiment Analysis

In [17]:
from textblob import TextBlob

In [18]:
df['polarity'] = df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[0])
df['subjectivity'] = df['lemmatized'].apply(lambda x: TextBlob(x).sentiment[1])

In [19]:
df.drop(['char_count','stopword_count', 'stopwords_removed', 'clean_reviews'], axis=1, inplace=True)

In [21]:
sia = SentimentIntensityAnalyzer()

In [22]:
compound_polarity = {}
negativity = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    negativity[i] = sia.polarity_scores(row['lemmatized'])['neg']
    compound_polarity[i] = sia.polarity_scores(row['lemmatized'])['compound']

  0%|          | 0/500000 [00:00<?, ?it/s]

In [23]:
cp = pd.Series(compound_polarity)
neg = pd.Series(negativity)

In [24]:
df['compound_polarity'] = cp
df['negativity'] = neg

In [25]:
conditions = [
    (df['compound_polarity'] > 0.85),
    (df['compound_polarity'] > 0.5),
    (df['compound_polarity'] > 0),
    (df['compound_polarity'] > -0.5)
]
choices = [5, 4, 3, 2]
df['star_prediction'] = np.select(conditions, choices, default=0)

In [26]:
down = 0
up = 0
for i, row in tqdm(df.iterrows(), total=len(df)):
    if row['negativity'] > 0.1 and row['star_prediction'] > 1.0:
        row['star_prediction'] -= 1
        down += 1
    if row['polarity'] < 0.25 and row['star_prediction'] > 1.0:
        row['star_prediction'] -= 1
        down += 1
    if row['negativity'] < 0.01 and row['star_prediction'] < 5.0:
        row['star_prediction'] += 1
        up += 1

print(f'Altered {down} rows down and {up} rows up')

  0%|          | 0/500000 [00:00<?, ?it/s]

Altered 283430 rows down and 66761 rows up


In [27]:
df['delta'] = abs(df['stars'] - df['star_prediction'])

In [28]:
df.describe()['delta']

count    500000.000000
mean          0.892518
std           0.996385
min           0.000000
25%           0.000000
50%           1.000000
75%           1.000000
max           5.000000
Name: delta, dtype: float64

In [29]:
df['delta'].value_counts()

delta
0    211470
1    183861
2     63894
3     29629
4     10007
5      1139
Name: count, dtype: int64

In [184]:
pd.set_option('display.max_colwidth', None)
mask = df['delta'].isin([5])
bad_preds = df[mask]
print(f'{len(bad_preds)} 5 star misses made')

19 5 star misses made


In [195]:
bad_preds[['text', 'polarity', 'compound_polarity', 'negativity', 'stars', 'star_prediction']]

Unnamed: 0,text,polarity,compound_polarity,negativity,stars,star_prediction
47,if you want to pay for everything a la carte this is the place for you \nfood wasnt terrible not impressive\nthey brought a basket of chips and some tomato sauce which i asked politely for something spicier and some pico de gallo she brought them happily to me and the salsa was much better when asked what we would like to drink i asked for a coke and she brought out a bottle which i stated i wanted the fountain drink she said oh thats only pepsi never mentioned that they only had bottle drinks for coke \nwe ordered our food which was reasonably priced asked for sour cream and also to put cheese on the taco she let us know cheese was extra \nit was 250 extra for another basket of chips \nwhen i received the bill we paid more for the condiments then the actual food side of sour cream 200 pico de gallo 250 salsa 200 chips 250 cheese 100 and the bottled coke that we didnt want 500 \njust a suggestionwhen you order anything make sure to ask if there is an extra charge,0.141667,0.8847,0.037,1,5
122,we stayed at the saint last year for a girls trip the lobby is lovely they have a cool bar and restaurant that you cant help but to ohh and ahh over when you walk in all the things im accustomed too when i stay at a hotel anywhereambiance a little sexy but not overkill everyone is so friendly etc im thinking oh shhhh this is going to be a great weekend i checked in a few hours before my girl arrived because i had a ton of work i needed to do so i set up my laptop ordered some room service and worked for about 4 hours straight left out to buy a bottle and get a manipedi my girl arrivesits about to be some trouble in nola the two of us and no supervisionwe pop out for dinner come back in giggling as usual and we see ita hugh flying roach im freaked out i literally walk right out of the room down to the front desk i cannot they tell me theyre sending a maintenance worker to handle it not good enough if theres one thats bold enough to show itself he has other cohorts that prefer to live behind the scenes they give us another room but tell me they cant do more until 8am no the room the move us too also has small spiders and other dead small bugs in the corners of the window the tv doesnt work and one of the lamps doesnt have bulbs in it come on at this point its about 3am and i know im not staying here past 815am i dont want to sit my purse let alone my luggage in this hotel i go online to my normal go to hotel the w and book us there for the rest of the trip 8am comes and goes no manager comes to our room i go down and demand to see someone immediately this younger gentlemen comes out and apologizes i thank him for his apology and politely request all of my money back immediately he obliged and asked if i would consider coming back after changes have been made bless his heart no dear,0.086774,0.9679,0.078,1,5
165,came here after my husband bought home their lemon pepper wings they have plenty of flavors ranging from ones youve heard of lemon pepper mild to new orleans flavors yaka mein noonie bird they also have their drinks the watermelon is to die for i dont know what they put in it but lord help them the location isnt the best but theyre trying to put something back in the community its in the same section with the daiquiri shop so its really not that bad during the day i probably wouldnt come at night by myself definitely give this place a try hey you might even see skip,0.128283,-0.8914,0.193,4,0
347,dinner review\n\ni rarely write reviews but this honestly was one of the worst group dining experiences ive ever had it was objectively a horrible experience even without comparing it to the hype it receives and ruined what was supposed to be a nice birthday dinner celebration \n\nwe had a group of 8 that had a reservation made well in advance short wait but not too bad keep in mind it seemed like a normal friday night but werent super busy or atcapacity\n\nit was a birthday dinner so i told the hostesses that it was her birthday and asked if they could bring out something for her at the end of the meal which was acknowledged pretty standard stuff whether at a applebees or the french laundry we were sat at a booth right next to the maitre d stand and next to all the wait staffs home base so pretty much no excuse for not getting any attention \n\nafter getting us waters it took them a long time to even come by and acknowledge us after ordering wine it took them another good 1520 minutes to even bring us our wine and no one even checked in on us hint if youre running a restaurant wine is your profit center and doesnt take any work other than taking a bottle off a shelf and opening it bring it out asap \n\nafter ordering food it was more of the samea long wait for food to start coming out and no one checking in on us at all we drank the bottles we ordered and had to get up and flag someone down to get another wine order a couple of us even got up during the meal to remind them to bring out something for birthday girl at the end of the meal \n\nonce our meal was done and taken away we were given dessert menus and the server disappeared for 1015 minutes again after coming back he asked what dessert we wanted so we ordered some coffees and waited and waited eventually we got up and asked him what was going on and if they were going to bring anything out like we asked multiple times \n\nthey finally brought out a couple of cookies with a candle\n\nin summary the food was pretty hit or miss pretty mediocre for this price point whipped feta like other reviews have said was amazing and the braised lamb was really good but the sirloin was chewy and bland and the grilled caesar tasted like a soggy lukewarm wilted mess two orders were left untouched by a group of 8 atmosphere was nice and trendy but the service which is a huge reason to spend your hardearned at a restaurant was absolutely horrendous \n\ni really love going to cool popular restaurants and having a great time and i hate writing such a negative review but i had to share my experience especially for a restaurant that is getting an awful lot of undeserved hype,0.051646,0.9609,0.131,1,5
387,new owners turned it into a double threat bad food and bad service shame i loved this place,-0.112727,-0.8625,0.553,4,0
...,...,...,...,...,...,...
9703,my name is angela s and i have a froyo problem no really i dont even like it that much yet i continue to go to all of these stupid frozen yogurt joints like some dairycrazed masochist and eat 417 worth of disappointment at a time \n\nand then came tutti frutti \n\nits good its a little different the toppings are just soso but the yogurt itself is really good they have interesting flavors including taro if you havent tried something taro flavored tutti frutti is a good place to start it doesnt even need toppings \n\nwith so many froyo places in stl its rare that ive visited the same one twice but tutti frutti could become a standby,0.193269,-0.5103,0.202,4,0
9748,what miserable customer service \n\nwe went here as part of a special event for families of autistic children while i commend the owners for reaching out to people with special needs the way they handle themselves behind the counter is sickening \n\nwhen we got there we found out that the entire inside of the park was closed something that was not mentioned anywhere in their press release not on their own website either despite their insistence that it was so all the autistic kids were confined to a wave pool and two smaller areas which was far from adequate for this particular crowd when i complained to the manager dan he was clearly overwhelmed by the situation and had no answers to give regarding why it was never stated that most of the facility was off limits but at least he was polite unlike brandon \n\nbrandon another assistant manager interjected himself unasked in our conversation and interrupted me at every sentence i guess if the customer cant finish a sentence it doesnt count as a complaint even when i asked him to let me speak he kept talking over me great managerial technique there \n\nhe offered to refund our tickets but only if we left right then while we had no problem leaving there were a lot of families whose children took a lot effort to move either because of physical or developmental disabilities had either of them offered refunds at our leisure as a sign of good faith we would have stayed and spent more than the cost of the tickets on the insanely over priced food 12 for a basket of chicken fingers is utterly ridiculous plus guaranteed our return another time as it is theyve lost our business permanently \n\nif you choose to go here i hope you have fun just dont go expecting resolutions to any problems you might have because apparently they dont know the first thing about customer service,-0.066667,0.8859,0.116,1,5
9845,i know that pi is a popular place and that it gets packed in the loop especially on friday and saturday nights since they dont take reservations for 2 people and being the smart yelper that i am i called first to check on the wait they said that by the time i came in there would be no wait especially because the concert at the pageant was about to start so the bf and i headed down there 10 min later and he dropped me off so i wouldnt have to walk in the cold what a gentleman i walked in and the snooty hostesses told me there was a 40 min wait um there is no way all of those people just showed up in 10 min clearly someone fudged the truth to get me to come in and there wasnt a single seat or place to stand in the bar or the tiny waiting area i tried to call him to tell him not to park and walk down he would probably have to walk pretty far but he wasnt paying attention to his phone so he never got the message he walked in took one look and i knew we were both on the same page i politely told the hostesses that they could cross off our name and then waited by the door while he pulled the car around they proceeded to stare at me very obviously and whisper during this time until i left which is bad business whether youre a catty hostess that still thinks she is in high school or maybe still is or a hostess raving about how fabulous my outfit is either way girls its rude\n\ndont get me wrongthe pizza here is good but its no mystic pizza and it isnt the only pizza place around this story has a happy ending though we went to deweys were promptly seated experienced the best service there as we always do and left with full bellies probably way before we wouldve even gotten our pizza at pi \n\nnever again,0.162114,0.9441,0.050,1,5
9853,im not sure what happened here but this place went downhill fast i just had a pizza steak and chicken parm sandwich that were a step above edible seriously im still trying to get the taste of the pizza steak out of my mouth the only thing worse than being excited for a steak sandwich and having it suck is being excited for wings and having them forget to send the damn blue cheese really come on i can honestly say that based on this experience i would never go back \n\nfirst review great food and friendly service we had an italian hoagie and cheesesteak hoagie and hot wings all were fantastic the rolls are soft and fresh they were full of delicious meat the wings were large and the hot sauce is the best ive had in the area ill be back for sure,0.229850,0.9622,0.148,1,5


In [186]:
mask = df['delta'].isin([4])
bad_preds = df[mask]
print(f'{len(bad_preds)} 4 star misses made')

191 4 star misses made


In [187]:
bad_preds[['text', 'compound_polarity', 'negativity', 'stars', 'star_prediction']]

Unnamed: 0,text,compound_polarity,negativity,stars,star_prediction
47,if you want to pay for everything a la carte this is the place for you \nfood wasnt terrible not impressive\nthey brought a basket of chips and some tomato sauce which i asked politely for something spicier and some pico de gallo she brought them happily to me and the salsa was much better when asked what we would like to drink i asked for a coke and she brought out a bottle which i stated i wanted the fountain drink she said oh thats only pepsi never mentioned that they only had bottle drinks for coke \nwe ordered our food which was reasonably priced asked for sour cream and also to put cheese on the taco she let us know cheese was extra \nit was 250 extra for another basket of chips \nwhen i received the bill we paid more for the condiments then the actual food side of sour cream 200 pico de gallo 250 salsa 200 chips 250 cheese 100 and the bottled coke that we didnt want 500 \njust a suggestionwhen you order anything make sure to ask if there is an extra charge,0.8847,0.037,1,5
122,we stayed at the saint last year for a girls trip the lobby is lovely they have a cool bar and restaurant that you cant help but to ohh and ahh over when you walk in all the things im accustomed too when i stay at a hotel anywhereambiance a little sexy but not overkill everyone is so friendly etc im thinking oh shhhh this is going to be a great weekend i checked in a few hours before my girl arrived because i had a ton of work i needed to do so i set up my laptop ordered some room service and worked for about 4 hours straight left out to buy a bottle and get a manipedi my girl arrivesits about to be some trouble in nola the two of us and no supervisionwe pop out for dinner come back in giggling as usual and we see ita hugh flying roach im freaked out i literally walk right out of the room down to the front desk i cannot they tell me theyre sending a maintenance worker to handle it not good enough if theres one thats bold enough to show itself he has other cohorts that prefer to live behind the scenes they give us another room but tell me they cant do more until 8am no the room the move us too also has small spiders and other dead small bugs in the corners of the window the tv doesnt work and one of the lamps doesnt have bulbs in it come on at this point its about 3am and i know im not staying here past 815am i dont want to sit my purse let alone my luggage in this hotel i go online to my normal go to hotel the w and book us there for the rest of the trip 8am comes and goes no manager comes to our room i go down and demand to see someone immediately this younger gentlemen comes out and apologizes i thank him for his apology and politely request all of my money back immediately he obliged and asked if i would consider coming back after changes have been made bless his heart no dear,0.9679,0.078,1,5
165,came here after my husband bought home their lemon pepper wings they have plenty of flavors ranging from ones youve heard of lemon pepper mild to new orleans flavors yaka mein noonie bird they also have their drinks the watermelon is to die for i dont know what they put in it but lord help them the location isnt the best but theyre trying to put something back in the community its in the same section with the daiquiri shop so its really not that bad during the day i probably wouldnt come at night by myself definitely give this place a try hey you might even see skip,-0.8914,0.193,4,0
347,dinner review\n\ni rarely write reviews but this honestly was one of the worst group dining experiences ive ever had it was objectively a horrible experience even without comparing it to the hype it receives and ruined what was supposed to be a nice birthday dinner celebration \n\nwe had a group of 8 that had a reservation made well in advance short wait but not too bad keep in mind it seemed like a normal friday night but werent super busy or atcapacity\n\nit was a birthday dinner so i told the hostesses that it was her birthday and asked if they could bring out something for her at the end of the meal which was acknowledged pretty standard stuff whether at a applebees or the french laundry we were sat at a booth right next to the maitre d stand and next to all the wait staffs home base so pretty much no excuse for not getting any attention \n\nafter getting us waters it took them a long time to even come by and acknowledge us after ordering wine it took them another good 1520 minutes to even bring us our wine and no one even checked in on us hint if youre running a restaurant wine is your profit center and doesnt take any work other than taking a bottle off a shelf and opening it bring it out asap \n\nafter ordering food it was more of the samea long wait for food to start coming out and no one checking in on us at all we drank the bottles we ordered and had to get up and flag someone down to get another wine order a couple of us even got up during the meal to remind them to bring out something for birthday girl at the end of the meal \n\nonce our meal was done and taken away we were given dessert menus and the server disappeared for 1015 minutes again after coming back he asked what dessert we wanted so we ordered some coffees and waited and waited eventually we got up and asked him what was going on and if they were going to bring anything out like we asked multiple times \n\nthey finally brought out a couple of cookies with a candle\n\nin summary the food was pretty hit or miss pretty mediocre for this price point whipped feta like other reviews have said was amazing and the braised lamb was really good but the sirloin was chewy and bland and the grilled caesar tasted like a soggy lukewarm wilted mess two orders were left untouched by a group of 8 atmosphere was nice and trendy but the service which is a huge reason to spend your hardearned at a restaurant was absolutely horrendous \n\ni really love going to cool popular restaurants and having a great time and i hate writing such a negative review but i had to share my experience especially for a restaurant that is getting an awful lot of undeserved hype,0.9609,0.131,1,5
387,new owners turned it into a double threat bad food and bad service shame i loved this place,-0.8625,0.553,4,0
...,...,...,...,...,...
9703,my name is angela s and i have a froyo problem no really i dont even like it that much yet i continue to go to all of these stupid frozen yogurt joints like some dairycrazed masochist and eat 417 worth of disappointment at a time \n\nand then came tutti frutti \n\nits good its a little different the toppings are just soso but the yogurt itself is really good they have interesting flavors including taro if you havent tried something taro flavored tutti frutti is a good place to start it doesnt even need toppings \n\nwith so many froyo places in stl its rare that ive visited the same one twice but tutti frutti could become a standby,-0.5103,0.202,4,0
9748,what miserable customer service \n\nwe went here as part of a special event for families of autistic children while i commend the owners for reaching out to people with special needs the way they handle themselves behind the counter is sickening \n\nwhen we got there we found out that the entire inside of the park was closed something that was not mentioned anywhere in their press release not on their own website either despite their insistence that it was so all the autistic kids were confined to a wave pool and two smaller areas which was far from adequate for this particular crowd when i complained to the manager dan he was clearly overwhelmed by the situation and had no answers to give regarding why it was never stated that most of the facility was off limits but at least he was polite unlike brandon \n\nbrandon another assistant manager interjected himself unasked in our conversation and interrupted me at every sentence i guess if the customer cant finish a sentence it doesnt count as a complaint even when i asked him to let me speak he kept talking over me great managerial technique there \n\nhe offered to refund our tickets but only if we left right then while we had no problem leaving there were a lot of families whose children took a lot effort to move either because of physical or developmental disabilities had either of them offered refunds at our leisure as a sign of good faith we would have stayed and spent more than the cost of the tickets on the insanely over priced food 12 for a basket of chicken fingers is utterly ridiculous plus guaranteed our return another time as it is theyve lost our business permanently \n\nif you choose to go here i hope you have fun just dont go expecting resolutions to any problems you might have because apparently they dont know the first thing about customer service,0.8859,0.116,1,5
9845,i know that pi is a popular place and that it gets packed in the loop especially on friday and saturday nights since they dont take reservations for 2 people and being the smart yelper that i am i called first to check on the wait they said that by the time i came in there would be no wait especially because the concert at the pageant was about to start so the bf and i headed down there 10 min later and he dropped me off so i wouldnt have to walk in the cold what a gentleman i walked in and the snooty hostesses told me there was a 40 min wait um there is no way all of those people just showed up in 10 min clearly someone fudged the truth to get me to come in and there wasnt a single seat or place to stand in the bar or the tiny waiting area i tried to call him to tell him not to park and walk down he would probably have to walk pretty far but he wasnt paying attention to his phone so he never got the message he walked in took one look and i knew we were both on the same page i politely told the hostesses that they could cross off our name and then waited by the door while he pulled the car around they proceeded to stare at me very obviously and whisper during this time until i left which is bad business whether youre a catty hostess that still thinks she is in high school or maybe still is or a hostess raving about how fabulous my outfit is either way girls its rude\n\ndont get me wrongthe pizza here is good but its no mystic pizza and it isnt the only pizza place around this story has a happy ending though we went to deweys were promptly seated experienced the best service there as we always do and left with full bellies probably way before we wouldve even gotten our pizza at pi \n\nnever again,0.9441,0.050,1,5
9853,im not sure what happened here but this place went downhill fast i just had a pizza steak and chicken parm sandwich that were a step above edible seriously im still trying to get the taste of the pizza steak out of my mouth the only thing worse than being excited for a steak sandwich and having it suck is being excited for wings and having them forget to send the damn blue cheese really come on i can honestly say that based on this experience i would never go back \n\nfirst review great food and friendly service we had an italian hoagie and cheesesteak hoagie and hot wings all were fantastic the rolls are soft and fresh they were full of delicious meat the wings were large and the hot sauce is the best ive had in the area ill be back for sure,0.9622,0.148,1,5
