In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances

# import and run file
from importnb import imports
with imports("ipynb"):
    import data_preparation
%run data_preparation.ipynb

In [6]:
# one-hot encoding for categorical
data_cat = restaurants[['Cuisine', 'Open After 8pm?']]
data_cat = pd.get_dummies(data_cat, columns=['Cuisine', 'Open After 8pm?'], drop_first=True, dtype=int)
# standardization for numerical
data_num = restaurants[['Latitude', 'Longitude', 'Average Cost']]
scaler = StandardScaler()
data_num = scaler.fit_transform(data_num)

data_restaurant = np.column_stack((data_num, data_cat))

In [7]:
# euclidean distance
restaurants_euclidean = pd.DataFrame(
    euclidean_distances(data_restaurant, data_restaurant),
    columns=restaurants['Restaurant Name'],
    index=restaurants['Restaurant Name']
)
# cosine distance
restaurants_cosine = pd.DataFrame(
    cosine_distances(data_restaurant, data_restaurant),
    columns=restaurants['Restaurant Name'],
    index=restaurants['Restaurant Name']
)

In [15]:
def contentfilter_recommendation(name, score_type='Euclidean'):
    reviewer_restaurants = reviews[reviews['Reviewer Name'] == name].sort_values(['Rating'], ascending=False)
    fav_restaurant = reviewer_restaurants['Restaurant Name'].iloc[0]
    if (score_type == 'Cosine'):
        data_ret = restaurants_cosine.loc[:,fav_restaurant].sort_values(ascending=True)
        return( data_ret[data_ret.index != fav_restaurant].head(10) )
    else:
        data_ret = restaurants_euclidean.loc[:,fav_restaurant].sort_values(ascending=True)
        return( data_ret[data_ret.index != fav_restaurant].head(10) )

In [16]:
print(contentfilter_recommendation('Willie Jacobsen'))

Restaurant Name
Clarkes Off Campus        0.930041
Edzo's Burger Shop        1.105218
Pâtisserie Coralie        1.120795
Philz Coffee              1.221429
Hecky's BBQ               1.235294
Evanston Chicken Shack    1.301952
Le Peep                   1.320877
Fridas                    1.331222
Prairie Moon              1.369645
Mumbai Indian Grill       1.415534
Name: Jimmy Johns, dtype: float64


In [17]:
print(contentfilter_recommendation('Willie Jacobsen', 'Cosine'))

Restaurant Name
Soban Korea               0.319224
Evanston Chicken Shack    0.365367
Kung Fu Tea               0.392735
Pâtisserie Coralie        0.394509
Edzo's Burger Shop        0.395345
Mumbai Indian Grill       0.411291
Chipotle                  0.424942
Elephant & Vine           0.430580
Hokkaido Ramen            0.461365
Picnic                    0.469977
Name: Jimmy Johns, dtype: float64
