In [85]:
from collections import defaultdict
from collections import Counter
import json
import math
import string
import time
import numpy as np
from nltk.tokenize import TreebankWordTokenizer
from IPython.core.display import HTML
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import PorterStemmer
import re

In [86]:
with open("finalData.json", "r") as f:
    data = json.load(f)

In [87]:
small_data = data['BOSTON']

In [88]:
index_to_restaurant = {i: v for i, v in enumerate(small_data.keys())}

In [89]:
index_to_restaurant

{0: 'Longwood Galleria',
 1: 'Legal Sea Foods',
 2: 'Espresso Minute',
 3: 'Amrheins',
 4: "Fiouna's Persian Fusion Cuisine",
 5: 'Code 10',
 6: 'Star Market',
 7: 'Shenannigans Bar',
 8: "McGann's Pub",
 9: "Whiskey's Food & Spirits",
 10: 'Burger King',
 11: "Fin's Sushi & Grill",
 12: 'Campo di Fiori',
 13: 'The North End',
 14: 'Blue Wave',
 15: "Herrera's",
 16: 'Aragosta Bar and Bistro',
 17: 'Pressed Sandwiches',
 18: 'Fin Point Oyster Bar + Grille',
 19: 'Sister Sorel',
 20: 'Cha Cha Cha Taqueria',
 21: 'The Boston Chipyard',
 22: 'Restaurante Cesaria',
 23: 'Flemings Prime Steakhouse & Wine Bar',
 24: 'Boloco',
 25: 'Croma',
 26: 'Sidebar Boston',
 27: 'Five Horses Tavern',
 28: 'Beantown Pho & Grill',
 29: 'Al Capone Pizza Pasta & Meats',
 30: 'Scoozi Newbury',
 31: 'Charles River Bistro',
 32: "Roxy's Gourmet Grilled Cheese",
 33: 'Oppa Sushi',
 34: "Broad Street Paulie's",
 35: "Lilly's Gourmet Pasta Express",
 36: "Amelia's Taqueria",
 37: "Emilio's",
 38: 'Empire Asian Re

In [90]:
restaurant_to_index = {v: i for i, v in index_to_restaurant.items()}

In [91]:
tfidf_vec = TfidfVectorizer(stop_words = 'english')

In [92]:
word_splitter = re.compile(r"""
    (\w+)
    """, re.VERBOSE)

def getwords(sent):
    return [w.lower() 
            for w in word_splitter.findall(sent)]

In [93]:
stemmer=PorterStemmer()
reviews = []
counter = 0
review_idx_for_restaurant = dict()
for restaurant, restaurant_dic in small_data.items():
    indices = []
    for review in restaurant_dic['reviews']:
        all_words = getwords(review['text'])
        stem_text = [stemmer.stem(t.lower()) for t in all_words]
        reviews.append(" ".join(stem_text))
        indices.append(counter)
        counter += 1
    review_idx_for_restaurant[restaurant] = indices

In [94]:
tfidf_mat = tfidf_vec.fit_transform(reviews).toarray()

In [34]:
def build_movie_sims_cos(num_movies, input_doc_mat):
    """Returns a matrix of size num_movies x num_movies where for (i,j), entry [i,j]
       should be the cosine similarity between the movie with index i and the movie with index j
        
    Note: All movies are trivially perfectly similar to themselves, so the diagonals of the output matrix should be 1.
    
    Params: {num_movies: Integer,
             input_doc_mat: Numpy Array,
             movie_index_to_name: Dict,
             movie_name_to_index: Dict,
             input_get_sim_method: Function}
    Returns: Numpy Array 
    """
    cos_sim = np.zeros((num_movies, num_movies))
    norms = np.linalg.norm(input_doc_mat, axis=1)
    for i in range(num_movies):
        for j in range(num_movies):
            cos_sim[i][j] = np.dot(input_doc_mat[i], input_doc_mat[j])/(norms[i] * norms[j])
    return cos_sim

In [None]:
sims_cos = build_movie_sims_cos(len(reviews), tfidf_mat)



In [62]:
review_splitter = [ids[0] for ids in review_idx_for_restaurant.values()][1:]

In [68]:
def get_ranked_restaurants(in_restaurant, sim_matrix):
    # input restaurant will have 11 reviews
    # find every row that corresponds to a review for input restaurant
    # take the average of them
    # group by every 10
    rest_idx = restaurant_to_index[in_restaurant]
    review_ids = review_idx_for_restaurant[in_restaurant]
    review_sims = sim_matrix[review_ids]
    review_sims = np.mean(review_sims, axis=0)
    restaurant_sims = [np.mean(arr) for arr in np.split(review_sims, review_splitter)]
    rest_lst = [(index_to_restaurant[i], s) for i,s in enumerate(restaurant_sims)]
    rest_lst = rest_lst[:rest_idx] + rest_lst[rest_idx+1:]
    rest_lst = sorted(rest_lst, key=lambda x: -x[1])
    return rest_lst

In [None]:
test = "Longwood Galleria"
top_restaurants = get_ranked_restaurants(test, sims_cos)

In [83]:
for rev in small_data[test]['reviews'][:3]:
    print(rev['text'])
    print("///////")

Seeing more variety for food in the North Shore has me pumped! There is a Poké place in Beverly, but it's a little far. However, this is down the street for me. Not so excited that it is physically located in the mall, but it makes sense. Well, at the time before the whole Corona Virus pandemic started. Lame. 

I also enjoy that this is a small company from Maine opening up here. Similar to Hop & Grind, the restaurant is run by the owner's and from what I understand is similar to a restaurant in New Hampshire as well.

The food, we must talk about the food darling. The food is fresh and lighter fare. I always feel a little unsatisfied with the bowls, which the Teriyaki Musubi helped put in a dent. But it's healthy and doesn't leave one feeling run down. There are so many cheeseburgers you can hammer down before you're saying "damn I need a salad or something." This is a great option. It is fast; prepped and the bowl is put together right in front of you. It's a healthier option with re

In [84]:
for restaurant in top_restaurants[:3]:
    name = restaurant[0]
    print("RESTAURANT: ", name)
    for rev in small_data[name]['reviews'][:3]:
        print(rev['text'])
        print("///////")
    print("")

RESTAURANT:  Thai Grille
It's not thai food but i dont care because I hate UMI chicken teriyaki. I really like it and the price isnt bad but could be better.
///////
Good place to grab a quick bite in the mall. I ordered the beef with noodles and it was a huge portion. I ate a third of it and saved the rest for later. The fresh sautéed veggies were good and FHS beef was tender. I would come here again if I find myself shopping there in the future.
///////
I always get the noodles and terriyaki chicken. Its great and the price is right. The staff is young, energetic and does a great job.
///////

RESTAURANT:  The NexMex Thing
Apparently, they are moving to a new location next week!  It'll be "right across the street" although I don't know what the new address is, exactly. We are huge fans and will brave the rush hour traffic to head up here. It's not big so definitely don't be bringing 20 people all at once....although I can't vouch for the new location moving forward.   

Taco Tuesdays