In [3]:
import numpy as np 
import nltk 
from nltk.corpus import opinion_lexicon
import re
import requests
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import json 
import csv 
from nltk import word_tokenize
from nltk.corpus import stopwords 
from nltk import FreqDist 
import wordcloud 
unwanted = nltk.corpus.stopwords.words("english")
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
nltk.download('punkt')
from nltk import pos_tag
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
from textblob import TextBlob


[nltk_data] Downloading package vader_lexicon to C:\Users\Mabel
[nltk_data]     Ifeoma\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Mabel
[nltk_data]     Ifeoma\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Mabel
[nltk_data]     Ifeoma\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Mabel
[nltk_data]     Ifeoma\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [16]:
# Loading data and extracting the data needed
Data = pd.read_csv("TripAdvisor_RestauarantRecommendation.csv")

Data = Data[["Name", "Location","Reviews","Comments"]]
# Adds the value states  
Data['State'] = [i.split(',')[-1].split(' ')[1] for i in Data.Location]
# Dropping Rows with empty columns 
Data = Data.dropna()


print(Data.head())

                            Name                      Location  \
0  Betty Lou's Seafood and Grill  San Francisco, CA 94133-3908   
1              Coach House Diner     Hackensack, NJ 07601-6337   
2               Table Talk Diner   Poughkeepsie, NY 12601-5476   
3                    Sixty Vines          Plano, TX 75093-7777   
4                   The Clam Bar            Syracuse, NY 13212   

            Reviews                                           Comments  
0  4.5 of 5 bubbles                                                NaN  
1    4 of 5 bubbles  Both times we were there very late, after 11 P...  
2    4 of 5 bubbles  Waitress was very friendly but a little pricey...  
3  4.5 of 5 bubbles  Not sure why I went there for the second time....  
4    4 of 5 bubbles  Doesn't look like much from the outside but wa...  


In [22]:

def tokens(raw):
    
    
    words = nltk.wordpunct_tokenize(raw)
    

    nonPunct = re.compile('.*[A-Za-z].*') # must contain a letter or digit
    filter  = [w for w in words if nonPunct.match(w)]
    
    words = [word.lower() for word in filter]
            
    
    return words
    
def removal(tags):
    # function that removes stopwords 
    pos_dict = {'J':wordnet.ADJ, 'V':wordnet.VERB, 'N':wordnet.NOUN, 'R':wordnet.ADV}
    Pos_tag_list = []
    for word, tag in tags:
        if word not in unwanted:
           Pos_tag_list.append(tuple([word, pos_dict.get(tag[0])]))
           
    return Pos_tag_list


def posTag(text):
    # and tags the words to their POS
    tags = pos_tag(tokens(text))
    
    return tags 

def lemmanized(Pos_tagged):
    lemma_com = " "
    for word, pos in Pos_tagged:
        if not pos:
            lemma = word
            lemma_com = lemma_com + " " + lemma
        else:
            lemma = wordnet_lemmatizer.lemmatize(word, pos=pos)
            lemma_com = lemma_com + " " + lemma
    return lemma_com 

def getSubjectivity(comment):
    return TextBlob(comment).sentiment.subjectivity
    # function to calculate polarity
def getPolarity(comment):
    return TextBlob(comment).sentiment.polarity

def analysis(polarity):
    if polarity > 0:
        return 'Positive'
    elif polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

In [19]:
# Does tokeninzation and removal of stopwords for each comment im the data

for i, row in Data.iterrows():
    comment = Data.at[i,'Comments']
    Data.at[i,'Comments'] = removal(posTag(comment))


In [20]:
Data['lem_comments'] = Data['Comments'].apply(lemmanized)
Data.head()

Unnamed: 0,Name,Location,Reviews,Comments,State,lem_comments
1,Coach House Diner,"Hackensack, NJ 07601-6337",4 of 5 bubbles,"[(times, n), (late, r), (pm, n), (time, n), (m...",NJ,time late pm time many diner forget restaura...
2,Table Talk Diner,"Poughkeepsie, NY 12601-5476",4 of 5 bubbles,"[(waitress, n), (friendly, a), (little, a), (p...",NY,waitress friendly little pricey diner food d...
3,Sixty Vines,"Plano, TX 75093-7777",4.5 of 5 bubbles,"[(sure, a), (went, v), (second, a), (time, n),...",TX,sure go second time go terrible average plac...
4,The Clam Bar,"Syracuse, NY 13212",4 of 5 bubbles,"[(look, v), (like, None), (much, r), (outside,...",NY,look like much outside walk either front doo...
6,Black Angus Steakhouse - Federal Way,"Federal Way, WA 98003-5465",4 of 5 bubbles,"[(easy, a), (time, n), (place, n), (dinner, n)...",WA,easy time place dinner week night special oc...


In [49]:
# Make a new DataFrame with what we want to look at 
detail_data = pd.DataFrame(Data[['Name','State','lem_comments']])

In [50]:
# Polarity – talks about how positive or negative the opinion is
# Subjectivity – talks about how subjective the opinion is

detail_data['Subjectivity'] = detail_data['lem_comments'].apply(getSubjectivity) 
detail_data['Polarity'] = detail_data['lem_comments'].apply(getPolarity) 
detail_data['Analysis'] = detail_data['Polarity'].apply(analysis)
detail_data.head()


Unnamed: 0,Name,State,lem_comments,Subjectivity,Polarity,Analysis
1,Coach House Diner,NJ,time late pm time many diner forget restaura...,0.557143,0.314286,Positive
2,Table Talk Diner,NY,waitress friendly little pricey diner food d...,0.6,0.3575,Positive
3,Sixty Vines,TX,sure go second time go terrible average plac...,0.748611,-0.31875,Negative
4,The Clam Bar,NY,look like much outside walk either front doo...,0.344643,0.203571,Positive
6,Black Angus Steakhouse - Federal Way,WA,easy time place dinner week night special oc...,0.75119,0.497619,Positive


In [55]:
# Overview of all the states

states = []
for i, row in Data.iterrows():
    #Get state
    state = Data.at[i,'State'] 
    states.append(state)

     
states = set(states) 
states = sorted(states)  

['', 'NJ', 'NY', 'OR', 'PA', 'TX', 'WA']


In [58]:
print("You will find the resturants in the following 6 states: \n {} New Jersey \n {} New York \n {} Oregon \n {} Pennsylvania \n {} Texas \n {} Washington".format(states[1],states[2],states[3],states[4],states[5],states[6]))

You will find the resturants in the following 6 states: 
 NJ New Jersey 
 NY New York 
 OR Oregon 
 PA Pennsylvania 
 TX Texas 
 WA Washington


In [110]:
Top_rest_NJ = []
Top_rest_NY = []
Top_rest_WA = []
Top_rest_TX = []
Top_rest_PA = []
Top_rest_OR = []

m1= 0
m2= 0
m3= 0
m4= 0
m5= 0
m6= 0
count=0

# sort the resturants according to their state and there polarity
for i, row in detail_data.iterrows():
        
        if detail_data.at[i,'State'] == states[1]:
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_NJ.append(t)
                 
                                     
        elif detail_data.at[i,'State'] == states[2]:
        
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_NY.append(t)
                        
        elif detail_data.at[i,'State'] == states[3]:
                
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_OR.append(t)
                        
        elif detail_data.at[i,'State'] == states[4]:
                
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_PA.append(t)
                        
        elif detail_data.at[i,'State'] == states[5]:
        
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_TX.append(t)
                        
        elif detail_data.at[i,'State'] == states[6]:
                t = (i,detail_data.at[i,'Name'],round(detail_data.at[i,'Polarity'],3),round(detail_data.at[i,'Subjectivity'],3))
                Top_rest_WA.append(t)
                        
        else:
                print(i,Data.at[i,'Name'],Data.at[i,'Location'])




Top_rest_NJ = sorted(Top_rest_NJ, key=lambda tup: tup[2],reverse=True) 
Top_rest_NY = sorted(Top_rest_NY, key=lambda tup: tup[2],reverse=True) 
Top_rest_OR = sorted(Top_rest_OR, key=lambda tup: tup[2],reverse=True) 
Top_rest_TX = sorted(Top_rest_TX, key=lambda tup: tup[2],reverse=True) 
Top_rest_PA = sorted(Top_rest_PA, key=lambda tup: tup[2],reverse=True) 
Top_rest_WA = sorted(Top_rest_WA, key=lambda tup: tup[2],reverse=True)
                
                


2617 The Watermark Niagara Falls, Ontario L2G 3V9, 


In [122]:

# We interpret the polarity measure as the indication of how good the resturant is and we construct a new frame for each state that presents
# the 10 best restuarants and their polarity and subjectivity score. 

Top_WA = pd.DataFrame(Top_rest_WA[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])
Top_NY = pd.DataFrame(Top_rest_NY[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])
Top_NJ = pd.DataFrame(Top_rest_NJ[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])
Top_OR = pd.DataFrame(Top_rest_OR[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])
Top_PA = pd.DataFrame(Top_rest_PA[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])
Top_TX = pd.DataFrame(Top_rest_TX[0:10], columns=['Index','Restuarant','Love-score','Subjectivity-score'])



In [123]:
print(Top_OR)


   Index                     Restuarant  Love-score  Subjectivity-score
0   1105  Salty's On The Columbia River       0.612               0.708
1    129                    Island Cafe       0.521               0.697


In [124]:
print(Top_PA)

   Index                          Restuarant  Love-score  Subjectivity-score
0    839                    Carlucci's Grill       0.700               0.733
1   1888                      Non Solo Pasta       0.540               0.737
2   2438                        Cafe Antonio       0.250               0.483
3   1855                 Canal Street Grille       0.237               0.550
4   1924                         Yardley Inn       0.200               0.300
5    823  La Villa Pizza & Family Restaurant       0.180               0.467
6    408                   Vault Brewing Co.       0.167               0.389
7   2672                   Salute Ristorante       0.000               0.083
8    670                        Comfort Food      -0.114               0.286


In [125]:
print(Top_NJ)

   Index                         Restuarant  Love-score  Subjectivity-score
0   1966           Spanish Olive Restaurant       0.933               0.917
1   3047                       Chick's Deli       0.900               0.525
2   1056         Aarzu Modern Indian Bistro       0.867               0.762
3    427                          Hamburgao       0.850               0.800
4   1874                     Bonefish Grill       0.833               0.783
5   1246                Don Pepe Restaurant       0.800               0.733
6   2230                       Cafe Matisse       0.767               0.850
7   2513  Baladina Mediterranean Restaurant       0.767               0.917
8    536                   Portugal Express       0.750               0.800
9    557                    Sophie's Bistro       0.750               0.875


In [126]:
print(Top_NY)

   Index                      Restuarant  Love-score  Subjectivity-score
0      9          John Thomas Steakhouse       1.000               1.000
1    404                          Chef's       1.000               0.300
2    455     Top of The Falls Restaurant       1.000               1.000
3    578  Delmonico's Italian Steakhouse       1.000               0.300
4    718  Lorenzo's Restaurant & Cabaret       1.000               0.300
5   1276   Ceci Italian Restaurant & Bar       1.000               1.000
6   2119         Martha's Country Bakery       1.000               1.000
7   1799                        Johnny's       0.933               0.683
8   2425       The Bistro at La Tourelle       0.900               0.875
9    434                  The Marketside       0.860               0.950


In [127]:
print(Top_WA)

   Index                        Restuarant  Love-score  Subjectivity-score
0    335                        Dough Zone       1.000               0.300
1    879            WildFin American Grill       1.000               1.000
2   1038   Elmer's Restaurant - Mill Plain       1.000               1.000
3   1134          5th Avenue Sandwich Shop       1.000               0.300
4   2919  Black Angus Steakhouse - Spokane       1.000               1.000
5   1590                     Old Town Cafe       0.933               0.917
6   2569      Charlies Restaurant & Lounge       0.933               0.917
7   2696                   8oz Burger & Co       0.900               0.875
8    462      Carousel Restaurant & Bistro       0.850               0.900
9   2782               Assaggio Ristorante       0.850               0.800


In [128]:
print(Top_TX)

   Index                      Restuarant  Love-score  Subjectivity-score
0    260                            Uchi       1.000               1.000
1    543         Eddie V's Prime Seafood       1.000               1.000
2    888                    Capistrano's       1.000               1.000
3   1097                      Rodeo Goat       1.000               0.650
4   1677        Fish N' Tails Oyster Bar       1.000               1.000
5   2346                      Rodeo Goat       1.000               0.650
6   2361                  State Of Grace       1.000               1.000
7   1138  La Fiesta Restaurant & Cantina       0.933               0.683
8   1701            Cosmos Bar and Grill       0.933               0.917
9   2687        Calico County Restaurant       0.933               0.917
