## **Imports**


In [448]:
import pandas as pd
from geopy.distance import geodesic
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import TfidfVectorizer

## **Overriding Defaults**


In [449]:
pd.set_option('display.max_rows', 20)

## **Loading Dataset**


In [450]:
restaurantsDf = pd.read_csv('./data/registered_restaurants.csv', encoding='latin-1')
reviewsDf = pd.read_csv('./data/labelled_reviews.csv', encoding='latin-1')

In [451]:
restaurantsDf.reset_index(drop=True)
restaurantsDf.head(1)

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId
0,---kPU91CF4Lq2-WlRu9Lw,4903 State Rd 54,New Port Richey,28.217288,-82.733344,Frankie's Raw Bar,4.5,24,"Seafood, Latin American",frankies-raw-bar,65eda330d745307cb1a92556,65d8e5f75b89ca028e3fd8cb


In [452]:
reviewsDf.head(1)

Unnamed: 0,id,review_id,user_id,restaurant_id,slug,rating,content,createdAt,sentiment
0,65eddbf4b6474deda4d94197,zM7u3iffMjniyvQAaN9Tnw,65d92f185b89ca028e45e930,65eda92ad745307cb1a957d3,nSi0NLoRHw,1.0,So disappointing on so many levels. Have been ...,2012-05-29 03:51:08,neutral


# **<div align="center">`User Data`</div>**

In [453]:
userId = "65d8f8525b89ca028e417247"

# longitude, latitude
user_coordinates = (29.951065, -90.071533)

In [454]:
user_reviews = reviewsDf[(reviewsDf['user_id'] == userId) & (reviewsDf['sentiment'] == 'positive')]
user_reviews

Unnamed: 0,id,review_id,user_id,restaurant_id,slug,rating,content,createdAt,sentiment
170765,65f57dddb19fe11f1eb9a15d,361-FSR_dQIhf8ws8smiGA,65d8f8525b89ca028e417247,65edacb9d745307cb1a975d3,0EX9zN_5mY,5.0,"Diablo burger. Zucchini fries. A beer, duh. An...",2016-03-08 03:34:06,positive
264351,65f5a8a3b19fe11f1ebdea13,z3aEaeFdY3doRz5ZCaU3xQ,65d8f8525b89ca028e417247,65edab99d745307cb1a96c55,lfcGBG65dX,5.0,I love the pizza. I love the salads. I love th...,2015-11-24 01:37:12,positive
421886,65f727a8704777ac862361d4,zwL67OeySWJ6yzgUAnV4Lg,65d8f8525b89ca028e417247,65edae13d745307cb1a98131,Ws2yro-Sj9,5.0,Every time I come here there's something new. ...,2016-02-10 21:30:31,positive


## **User Preferred Cuisine**


In [455]:
restaurant_ids = user_reviews['restaurant_id'].tolist()
user_restaurants = restaurantsDf[restaurantsDf['id'].isin(restaurant_ids)].reset_index(drop=True)

In [456]:
user_restaurants

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId
0,3TNaIkVUKrHzeeWMo9DMkg,137 E Congress St,Tucson,32.222127,-110.968902,Empire Pizza & Pub,4.0,394,"Pizza, Sandwiches, Salad",empire-pizza-and-pub,65edab99d745307cb1a96c55,65d8e5ec5b89ca028e3fd75f
1,43MDfrU28FYjfpamNfL9GA,"1865 E River Rd, Ste 101",Tucson,32.287746,-110.944633,Zinburger,4.0,795,"Burgers, American",zinburger-1,65edacb9d745307cb1a975d3,65d8e79e5b89ca028e3ff9d3
2,4nkbXXzgUn-tViXzuoKR1Q,11 S 6th Ave,Tucson,32.22175,-110.968313,Johnny Gibson's Downtown Market,4.0,115,"Bakeries, Delis, Coffee & Tea",johnny-gibsons-downtown-market,65edae13d745307cb1a98131,65d8e6245b89ca028e3fde85


In [457]:
# unique set of categories
preferred_categories = set()

# Iterate through rows in the DataFrame
for row in user_restaurants.itertuples():
    categories = row.categories.split(', ')
    preferred_categories.update(categories)

preferred_categories

{'American',
 'Bakeries',
 'Burgers',
 'Coffee & Tea',
 'Delis',
 'Pizza',
 'Salad',
 'Sandwiches'}

# **<div align="center">`Vector Space Model`</div>**

In [458]:
# Create a TF-IDF vectorizer to convert cuisines into numerical data
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(restaurantsDf["categories"].tolist())

# Compute the cosine similarity between cuisines
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

(5000, 5000)

In [459]:
indices = pd.Series(restaurantsDf.index, index=restaurantsDf['id'])

In [460]:
def get_recommendations(similarity_threshold, restaurant_rating_threshold):
    
    recommendations = pd.DataFrame()
    
    # Loop through restaurants in the DataFrame
    for index, restaurant in user_restaurants.iterrows():

        # Get restaurant indices based on name
        idx = indices[restaurant['id']]
        
        # Get the cosine similarity scores for the cuisine
        sim_scores = list(enumerate(cosine_sim[idx]))

        # Sort the restaurants based on similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        
        sim_threshold_scores = [x[0] for x in sim_scores if x[1] >= similarity_threshold]
    
        # Get the restaurant indices with high similarity and rating greater than 3
        restaurant_indices = [i for i in sim_threshold_scores if (
            restaurantsDf.at[i, 'rating'] > restaurant_rating_threshold and
            restaurantsDf.at[i, 'id'] != restaurant['id']
        )]

        # Add the recommendations to the DataFrame
        recommendations = pd.concat([recommendations, restaurantsDf.iloc[restaurant_indices]])
        
    recommendations = recommendations.drop_duplicates(subset='id').reset_index(drop=True)
    return recommendations

In [461]:
', '.join(preferred_categories)

'Bakeries, Coffee & Tea, Burgers, Delis, American, Pizza, Salad, Sandwiches'

In [462]:
recommendations = get_recommendations(0.85, 3)
recommendations

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId
0,0MRxCrYwOVdza3lEa4_Plg,"800 W Main St, Ste 230",Boise,43.616142,-116.203485,Flatbread Neapolitan Pizzeria,4.0,189,"Sandwiches, Pizza, Salad",flatbread-neapolitan-pizzeria,65eda5d9d745307cb1a93bc7,65d8e5f45b89ca028e3fd86d
1,2XEjxGuQD-dD2NwGzp1Djg,78 E Eagle River,Eagle,43.690076,-116.353011,Idaho Pizza Company,4.0,85,"Sandwiches, Salad, Pizza",idaho-pizza-company-1,65eda9dfd745307cb1a95dc8,65d8e6185b89ca028e3fdd1b
2,2vpflvpjxG5SbE2aSScg3Q,15 W Lincoln Hwy,Langhorne,40.158546,-74.912359,Paul's Penndel Pizza,4.0,44,"Sandwiches, Salad, Pizza",pauls-penndel-pizza,65edaa9dd745307cb1a96408,65d8e6765b89ca028e3fe911
3,3KH2LcyjjA976D4sfNc55w,34940 US Hwy 19 N,Palm Harbor,28.089644,-82.740720,Piper's Scratch Pizza Shop,5.0,51,"Pizza, Salad, Sandwiches",pipers-scratch-pizza-shop,65edab53d745307cb1a96a07,65d8e67e5b89ca028e3fea1b
4,3euS__1BAL3ETiIuecS6ZQ,"1212 S Clearview Pkwy, Ste D",New Orleans,29.960096,-90.185347,Theo's Neighborhood Pizza,3.5,107,"Pizza, Sandwiches, Salad",theos-neighborhood-pizza,65edac01d745307cb1a96fc0,65d8e7805b89ca028e3ff5f8
...,...,...,...,...,...,...,...,...,...,...,...,...
25,3aY8m5w6UnxXbRMhEUPUMg,6390 E Grant Rd,Tucson,32.250078,-110.855525,Zinburger,4.0,636,"Burgers, American",zinburger,65edabded745307cb1a96e99,65d8e79e5b89ca028e3ff9d3
26,3gqB-N60ePaggR3JzkRgEg,621 N 4th Ave,Tucson,32.229425,-110.965869,Thunder Bacon Burger,5.0,25,"American, Burgers",thunder-bacon-burger,65edac0ed745307cb1a97033,65d8e7825b89ca028e3ff62f
27,3qpubK1C8SsDcq8BeKOEbA,6315 Delmar Blvd,University City,38.655992,-90.303217,HopCat - St Louis,4.0,160,"Burgers, American",hopcat-st-louis,65edac54d745307cb1a9727c,65d8e6135b89ca028e3fdc65
28,41dW3laDEV0boHre1utRgw,2580 S Falkenburg Rd,Tampa,27.928791,-82.337986,BubbaÃ¢ÂÂs 33,4.0,40,"American, Burgers",bubbaas-33,65edaca8d745307cb1a9753d,65d8e5b05b89ca028e3fcfc4


In [463]:
max_distance = 50

# Calculate distance for each restaurant
recommendations["distance"] = recommendations.apply(
    lambda row: geodesic(user_coordinates, (row["latitude"], row["longitude"])).kilometers,
    axis=1
)

# Filter restaurants within the maximum distance
recommendations = recommendations[recommendations["distance"] <= max_distance]
recommendations

Unnamed: 0,business_id,address,city,latitude,longitude,name,rating,review_count,categories,slug,id,userId,distance
4,3euS__1BAL3ETiIuecS6ZQ,"1212 S Clearview Pkwy, Ste D",New Orleans,29.960096,-90.185347,Theo's Neighborhood Pizza,3.5,107,"Pizza, Sandwiches, Salad",theos-neighborhood-pizza,65edac01d745307cb1a96fc0,65d8e7805b89ca028e3ff5f8,11.031854
9,-_1ctmwdtSpjfpFu0pHXzw,714 St Peter,New Orleans,29.958187,-90.065125,Fat BoyÃ¢ÂÂs Pizza,4.5,6,"Pizza, Salad",fat-boyas-pizza,65eda44cd745307cb1a92eb1,65d8e5f05b89ca028e3fd7da,1.002924
