In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('./Data/data_for_reccommender_1.csv')

In [3]:
df.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,11,11th,12,13,14,14th,15,...,cuisine_type_Spanish,cuisine_type_Steaks,cuisine_type_Sushi,cuisine_type_Tacos,cuisine_type_Taiwanese,cuisine_type_Tex-Mex,cuisine_type_Thai,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar
0,4.0,Omakase Room By Tatsu,7.7,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4.0,Sushi Azabu,8.5,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,3.0,Saint Julivert Fisherie,7.7,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2.0,Farida,8.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2.0,U-Gu,7.7,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [18]:
df.shape

(798, 3848)

## Creating an Item Based Recommender System

In [14]:
def create_sim_matrix(df, cols_to_drop):
    from sklearn.metrics.pairwise import cosine_similarity
    cos_sim = cosine_similarity(df.drop(cols_to_drop, axis = 1), df.drop(cols_to_drop, axis = 1))
    return cos_sim

In [15]:
create_sim_matrix(df, 'rest_name')

array([[1.        , 0.49800124, 0.3813691 , ..., 0.32032102, 0.37115691,
        0.3516557 ],
       [0.49800124, 1.        , 0.38306506, ..., 0.38799274, 0.36324116,
        0.34123165],
       [0.3813691 , 0.38306506, 1.        , ..., 0.39397142, 0.40096293,
        0.38023985],
       ...,
       [0.32032102, 0.38799274, 0.39397142, ..., 1.        , 0.45729609,
        0.35365303],
       [0.37115691, 0.36324116, 0.40096293, ..., 0.45729609, 1.        ,
        0.40460028],
       [0.3516557 , 0.34123165, 0.38023985, ..., 0.35365303, 0.40460028,
        1.        ]])

In [4]:
cosine_sim = cosine_similarity(df.drop('rest_name', axis = 1), df.drop('rest_name', axis = 1))

In [12]:
cosine_sim

array([[1.        , 0.49800124, 0.3813691 , ..., 0.32032102, 0.37115691,
        0.3516557 ],
       [0.49800124, 1.        , 0.38306506, ..., 0.38799274, 0.36324116,
        0.34123165],
       [0.3813691 , 0.38306506, 1.        , ..., 0.39397142, 0.40096293,
        0.38023985],
       ...,
       [0.32032102, 0.38799274, 0.39397142, ..., 1.        , 0.45729609,
        0.35365303],
       [0.37115691, 0.36324116, 0.40096293, ..., 0.45729609, 1.        ,
        0.40460028],
       [0.3516557 , 0.34123165, 0.38023985, ..., 0.35365303, 0.40460028,
        1.        ]])

In [5]:
indices = pd.Series(df.index)

In [22]:
df[df['rest_name'] == 'U-Gu'].index[0]

4

In [6]:
matches = pd.Series(cosine_sim[4]).sort_values(ascending = False)

In [7]:
top_5 = matches[1:6]

In [8]:
top_5

119    0.529630
330    0.517786
42     0.515992
1      0.510306
71     0.508832
dtype: float64

In [68]:
print(f'{round(top_5.values[0], 2)*100}%')

53.0%


In [64]:
top_5_list = list(top_5.index)

In [67]:
top_5_list

[119, 330, 42, 1, 71]

In [69]:
for i in top_5_list:
    print(f'{df.rest_name[i]} , rating = {df.rest_cost[i]}, cost = {df.rest_cost[i]}')

The Loyal , rating = 3.0, cost = 3.0
Noreetuh , rating = 3.0, cost = 3.0
Beebe’s , rating = 2.0, cost = 2.0
Sushi Azabu , rating = 4.0, cost = 4.0
Abuqir , rating = 1.0, cost = 1.0


In [23]:
def recommendations(name, cosine_sim = None, df= df):
    #empty list of restaurant
    recommended_restaurants = []
    if name in df['rest_name'].values:
    
        #find index of restaurant that matches the name
        rest_index = df[df['rest_name'] == name].index[0]

        #Find the index in the cosine matrix
        matching_index = pd.Series(cosine_sim[rest_index]).sort_values(ascending = False)

        #find top 5
        similar_indices = list(matching_index[1:6].index)

        #Print top 5 recs
        for i in similar_indices:
            recommended_restaurants.append(df.rest_name[i])
            print(f'{df.rest_name[i]} , rating = {df.rest_rating[i]}, cost = {df.rest_cost[i]}')
        
    else:
        print(f'Sorry, we can\'t find what you\'re looking for. Please try a different restaurant')

In [10]:
recommendations('Have & Meyer')

Ruffian , rating = 8.4, cost = 3.0
Frenchette , rating = 8.4, cost = 4.0
Ardesia , rating = 7.0, cost = 2.0
Terra , rating = 6.6, cost = 4.0
Racines , rating = 7.8, cost = 4.0


In [11]:
recommendations('The Polynesian')

Bar Beau , rating = 7.4, cost = 2.0
The Happiest Hour , rating = 7.5, cost = 2.0
The Spaniard , rating = 6.8, cost = 2.0
The Loyal , rating = 8.4, cost = 3.0
2nd Floor Bar & Essen , rating = 7.9, cost = 3.0


In [118]:
recommendations('OTB')

Sorry, we can't find what you're looking for. Please try a different restaurant


In [91]:
recommendations('Meadowsweet')

Mayfield , rating = 8.1, cost = 2.0
Marlow & Sons , rating = 8.9, cost = 3.0
Raoul’s , rating = 8.4, cost = 4.0
Birds Of A Feather , rating = 7.8, cost = 2.0
Bar Bolinas , rating = 7.6, cost = 3.0


In [92]:
recommendations('The Modern')

Eleven Madison Park , rating = 9.2, cost = 4.0
The NoMad Bar , rating = 7.8, cost = 3.0
Fishtag , rating = 7.0, cost = 3.0
The Clocktower , rating = 8.2, cost = 4.0
The NoMad , rating = 7.8, cost = 3.0


In [93]:
recommendations('Aita')

Rosemary’s , rating = 7.1, cost = 3.0
Bar Sardine , rating = 8.0, cost = 2.0
Samurai Mama , rating = 7.1, cost = 2.0
The NoMad Bar , rating = 7.8, cost = 3.0
Bar Primi , rating = 7.7, cost = 2.0


In [94]:
recommendations("Chavela's")

Bar San Miguel , rating = 7.6, cost = 2.0
Upland , rating = 9.1, cost = 3.0
Casa Enrique , rating = 9.0, cost = 3.0
Carbone , rating = 9.4, cost = 4.0
Cookshop , rating = 8.4, cost = 3.0


In [95]:
recommendations('Mayfield')

The Loyal , rating = 8.4, cost = 3.0
Simon & The Whale , rating = 8.3, cost = 3.0
Meadowsweet , rating = 8.4, cost = 3.0
Birds Of A Feather , rating = 7.8, cost = 2.0
Fedora , rating = 8.5, cost = 3.0


In [96]:
recommendations('Miss Ada')

Mayfield , rating = 8.1, cost = 2.0
Miss Lily’s , rating = 7.8, cost = 3.0
Cookshop , rating = 8.4, cost = 3.0
Bar Sardine , rating = 8.0, cost = 2.0
Chez Ma Tante , rating = 8.5, cost = 2.0


In [98]:
recommendations('Traif')

Fishtag , rating = 7.0, cost = 3.0
Crispo , rating = 7.7, cost = 3.0
Milos , rating = 8.1, cost = 4.0
Beyoglu , rating = 7.7, cost = 2.0
Balthazar , rating = 8.1, cost = 4.0


In [101]:
df.sample()

Unnamed: 0,rest_cost,rest_name,rest_rating,11,11th,12,13,14,14th,15,...,cuisine_type_Spanish,cuisine_type_Steaks,cuisine_type_Sushi,cuisine_type_Tacos,cuisine_type_Taiwanese,cuisine_type_Tex-Mex,cuisine_type_Thai,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar
177,2.0,Anella,7.9,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [100]:
recommendations('Fiat Café')

Beyoglu , rating = 7.7, cost = 2.0
Sable's , rating = 8.0, cost = 2.0
Sao Mai , rating = 8.3, cost = 1.0
Blondies , rating = 8.0, cost = 1.0
Neil's Coffee Shop , rating = 7.3, cost = 2.0


In [102]:
recommendations('Xixa')

Upland , rating = 9.1, cost = 3.0
Gran Electrica , rating = 8.1, cost = 3.0
Dudley's , rating = 7.1, cost = 3.0
Covina , rating = 8.2, cost = 3.0
Añejo , rating = 7.0, cost = 3.0


In [103]:
recommendations('Clover Club')

Miriam , rating = 8.0, cost = 2.0
The Lambs Club , rating = 7.8, cost = 4.0
Bar Sardine , rating = 8.0, cost = 2.0
Scalinatella , rating = 7.3, cost = 3.0
Cafe Cluny , rating = 7.9, cost = 3.0


In [104]:
recommendations('Miriam')

Sable's , rating = 8.0, cost = 2.0
Beyoglu , rating = 7.7, cost = 2.0
Pastrami Queen , rating = 7.8, cost = 2.0
Hugo & Sons , rating = 7.4, cost = 2.0
Lido Harlem , rating = 7.4, cost = 2.0


In [105]:
recommendations("L'Express")

Fishtag , rating = 7.0, cost = 3.0
Sable's , rating = 8.0, cost = 2.0
Beyoglu , rating = 7.7, cost = 2.0
Milos , rating = 8.1, cost = 4.0
Pastrami Queen , rating = 7.8, cost = 2.0


In [106]:
recommendations('Emily')

Emily West Village , rating = 8.0, cost = 2.0
Paulie Gee’s , rating = 9.0, cost = 3.0
Lee’s Tavern , rating = 8.0, cost = 2.0
Joe & Pat’s , rating = 8.1, cost = 1.0
Denino’s , rating = 8.2, cost = 2.0


In [107]:
recommendations('Sisters')

Miss Lily’s , rating = 7.8, cost = 3.0
Rosemary’s , rating = 7.1, cost = 3.0
Osteria Morini , rating = 7.8, cost = 4.0
Fishtag , rating = 7.0, cost = 3.0
Bar Sardine , rating = 8.0, cost = 2.0


In [109]:
recommendations('Cosme')

Cookshop , rating = 8.4, cost = 3.0
Casa Enrique , rating = 9.0, cost = 3.0
Rucola , rating = 6.9, cost = 3.0
Gran Electrica , rating = 8.1, cost = 3.0
Claro , rating = 8.3, cost = 2.0


In [111]:
recommendations('The Odeon')

Fishtag , rating = 7.0, cost = 3.0
Milos , rating = 8.1, cost = 4.0
The NoMad , rating = 7.8, cost = 3.0
Scalinatella , rating = 7.3, cost = 3.0
Rubirosa , rating = 9.2, cost = 3.0


In [122]:
recommendations('Mermaid Inn')

Sable's , rating = 8.0, cost = 2.0
Blondies , rating = 8.0, cost = 1.0
Beyoglu , rating = 7.7, cost = 2.0
Candle Cafe , rating = 7.6, cost = 2.0
Pastrami Queen , rating = 7.8, cost = 2.0


In [123]:
recommendations('Frank')

Lavagna , rating = 7.9, cost = 3.0
Scalinatella , rating = 7.3, cost = 3.0
Supper , rating = 7.9, cost = 3.0
Fishtag , rating = 7.0, cost = 3.0
Sfoglia , rating = 7.9, cost = 4.0


In [125]:
recommendations('Veselka')

Cafe Cluny , rating = 7.9, cost = 3.0
Beebe’s , rating = 7.2, cost = 2.0
Sao Mai , rating = 8.3, cost = 1.0
P.J. Clarke’s , rating = 7.5, cost = 3.0
Sable's , rating = 8.0, cost = 2.0


In [134]:
df[df['rest_name'] == 'Sao Mai']

Unnamed: 0,rest_cost,rest_name,rest_rating,11,11th,12,13,14,14th,15,...,cuisine_type_Spanish,cuisine_type_Steaks,cuisine_type_Sushi,cuisine_type_Tacos,cuisine_type_Taiwanese,cuisine_type_Tex-Mex,cuisine_type_Thai,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar
471,1.0,Sao Mai,8.3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


Once we pull up recommendations, we want to be able to look at the recommended restaurants and view their location, type, and description. 

# TFIDF Vectorization Recommender

In [17]:
tfdf = pd.read_csv('./Data/tfidf_df.csv')

In [19]:
tfdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 824 entries, 0 to 823
Columns: 12270 entries, rest_cost to rest_borough_Westchester
dtypes: float64(12206), int64(62), object(2)
memory usage: 77.1+ MB


In [21]:
tfdf.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,address_only,rest_zip_code,11,115,11am,11pm,11th,...,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar,rest_borough_Bronx,rest_borough_Brooklyn,rest_borough_Jersey City,rest_borough_Manhattan,rest_borough_Queens,rest_borough_Staten Island,rest_borough_Westchester
0,2.0,Mama’s Too,8.3,"2750 Broadway, New York, NY 10025",10025,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
1,4.0,Omakase Room By Tatsu,7.7,"14 Christopher St, New York, NY 10014",10014,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
2,4.0,Sushi Azabu,8.5,"428 Greenwich St., New York, NY 10013",10013,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
3,3.0,Saint Julivert Fisherie,7.7,"264 Clinton St, New York, NY 11201",11201,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
4,2.0,Farida,8.0,"498 9th Ave, New York, NY 10018",10018,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0


In [25]:
tf_matrix = create_sim_matrix(tfdf, ['rest_name', 'address_only', 'rest_zip_code'])

In [26]:
recommendations('Ruffian', tf_matrix, df=tfdf)

Manhatta , rating = 8.5, cost = 3.0
Mountain Bird , rating = 8.3, cost = 3.0
Bar Boulud , rating = 8.0, cost = 3.0
Mimi , rating = 8.4, cost = 3.0
Lafayette , rating = 8.1, cost = 3.0


In [27]:
recommendations('Traif', tf_matrix, df=tfdf)

Lighthouse , rating = 8.8, cost = 2.0
Sweet Chick , rating = 8.1, cost = 2.0
Mayfield , rating = 8.1, cost = 2.0
Chez Ma Tante , rating = 8.5, cost = 2.0
Emmy Squared , rating = 8.4, cost = 2.0


In [28]:
recommendations('Westville', tf_matrix, df=tfdf)

Westville East , rating = 7.7, cost = 2.0
Westville Chelsea , rating = 7.9, cost = 2.0
Emily West Village , rating = 8.0, cost = 2.0
Daily Provisions , rating = 8.4, cost = 2.0
Hudson Clearwater , rating = 8.1, cost = 2.0


In [29]:
recommendations('The Polynesian', tf_matrix, tfdf)

Porchlight , rating = 7.7, cost = 2.0
Rusty Knot , rating = 7.5, cost = 2.0
The Meatball Shop Chelsea , rating = 7.7, cost = 2.0
The Brooklyneer , rating = 7.4, cost = 2.0
Wilfie & Nell , rating = 7.1, cost = 2.0


In [32]:
tf_matrix.shape

(824, 824)

In [30]:
recommendations('The Odeon', tf_matrix, tfdf)

ABC Kitchen , rating = 7.4, cost = 4.0
Saxon + Parole , rating = 7.5, cost = 4.0
The Modern , rating = 7.8, cost = 4.0
Dylan Prime , rating = 7.3, cost = 4.0
Fred's at Barneys NY , rating = 7.4, cost = 4.0


In [33]:
recommendations('Veselka', tf_matrix, tfdf)

Viand , rating = 7.8, cost = 2.0
Russ & Daughters , rating = 8.6, cost = 2.0
Russ & Daughters Cafe , rating = 8.2, cost = 2.0
Mooncake Foods , rating = 7.4, cost = 2.0
Neil's Coffee Shop , rating = 7.3, cost = 2.0


In [None]:
def restaurant_details(restaurant):
    