In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances

# import and run file
from importnb import imports
with imports("ipynb"):
    import data_preparation
%run data_preparation.ipynb

In [None]:
# aggregate data for each unique user
data = reviews[['Reviewer Name', 'Birth Year', 'Marital Status', 'Has Children?',
                'Vegetarian?', 'Weight (lb)', 'Height (in)', 'Average Amount Spent',
                'Preferred Mode of Transport', 'Northwestern Student?']]

data_users = data.groupby(['Reviewer Name']).agg(**{
    'Birth Year': ('Birth Year', lambda x: pd.Series.mode(x)[0]),
    'Marital Status': ('Marital Status', lambda x: pd.Series.mode(x)[0]),
    'Has Children?': ('Has Children?', lambda x: pd.Series.mode(x)[0]),
    'Vegetarian?': ('Vegetarian?', lambda x: pd.Series.mode(x)[0]),
    'Weight (lb)': ('Weight (lb)', 'mean'),
    'Height (in)': ('Height (in)', 'mean'),
    'Average Amount Spent': ('Average Amount Spent', lambda x: pd.Series.mode(x)[0]),
    'Preferred Mode of Transport': ('Preferred Mode of Transport', lambda x: pd.Series.mode(x)[0]),
    'Northwestern Student?': ('Northwestern Student?', lambda x: pd.Series.mode(x)[0])
}).reset_index()

data_users = pd.get_dummies(
    data_users,
    columns=['Marital Status', 'Has Children?', 'Vegetarian?', 'Average Amount Spent',
             'Preferred Mode of Transport', 'Northwestern Student?'],
    drop_first=True, dtype=int
)

data_demographics = data_users.drop(columns=['Reviewer Name'])

In [None]:
user_demo_cosine = pd.DataFrame(
    cosine_distances(data_demographics, data_demographics),
    columns=data_users['Reviewer Name'],
    index=data_users['Reviewer Name']
)

In [None]:
def collab_filter_demographic(name, n_similar):
    most_similar_users = user_demo_cosine[user_demo_cosine.index != name][name].sort_values(ascending=True).index[0:n_similar]
    possible_recs = reviews[reviews['Reviewer Name'].isin(most_similar_users)].groupby(['Reviewer Name'])
    top_recs = possible_recs.apply(lambda x: x[x['Rating'] == x['Rating'].max()])
    return( top_recs[['Restaurant Name', 'Rating']].reset_index().drop(columns=['level_1']) )

In [None]:
print(collab_filter_demographic('Adam Gibbons', 2))