In [215]:
import pandas as pd
import numpy as np
import pickle
from we_eat import (build_database, build_user_matrix, clean_database, 
                    extract_reviews, ALS_recommender, recommender)


## Import data from review scrape

In [2]:
als_df = pd.read_pickle('data/als_df.pkl')

In [3]:
als_df.reset_index(drop=True, inplace=True)

In [4]:
als_df = als_df[['user_id', 'item_id', 'rating', 'date']]

In [5]:
als_df.shape

(5216, 4)

## Spark ALS

In [6]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS

In [7]:
spark = SparkSession.builder.getOrCreate()

In [8]:
spark_als_df = spark.createDataFrame(als_df) 

In [9]:
pandas_als_df = spark_als_df.toPandas()

In [10]:
pandas_als_df.head()

Unnamed: 0,user_id,item_id,rating,date
0,1520,596,3.0,2005-08-02
1,1520,592,4.0,2005-09-14
2,1369,480,4.0,2006-05-13
3,1369,601,5.0,2006-05-19
4,1369,488,5.0,2006-05-22


In [11]:
pandas_als_df = pandas_als_df[['user_id', 'item_id', 'rating']]

In [12]:
spark_als_df = spark.createDataFrame(pandas_als_df)

In [13]:
als_model = ALS(
    itemCol='item_id',
    userCol='user_id',
    ratingCol='rating',
    nonnegative=True,    
    regParam=0.1,
    rank=10,
    coldStartStrategy = 'drop') 

In [14]:
recommender = als_model.fit(spark_als_df)

## Find the Latent Features Matrices

In [15]:
uf_df = recommender.userFactors.toPandas()

In [16]:
if_df = recommender.itemFactors.toPandas()

In [17]:
user_factors_array = np.array(uf_df['features'].tolist())

In [18]:
item_factors_array = np.array(if_df['features'].tolist())

In [190]:
#pandas_als_df.pivot_table('rating','user_id','item_id')

## Recommendations based on Minimizing Dissatisfaction

### Mess around with small example

In [19]:
user1 = 'gabe'
user2 = 'nicole'

In [20]:
preds_db = pd.DataFrame(data=[[1, 4], [5, 8], [7, 9], [3, 4], [8, 8], [7, 6], [5, 2], [4, 9]], 
                              index=['rest1', 'rest2', 'rest3', 'rest4', 'rest5', 'rest6', 'rest7', 'rest8'], 
                              columns=[user1, user2])
                              
                              

In [21]:
preds_db['mean']=preds_db.mean(axis=1)
preds_db['min']=preds_db.min(axis=1)
preds_db_sorted = preds_db.sort_values(by=['mean','min'], ascending=False)
preds_db_sorted

Unnamed: 0,gabe,nicole,mean,min
rest5,8,8,8.0,8.0
rest3,7,9,8.0,7.0
rest6,7,6,6.5,6.0
rest2,5,8,6.5,5.0
rest8,4,9,6.5,4.0
rest4,3,4,3.5,3.0
rest7,5,2,3.5,2.0
rest1,1,4,2.5,1.0


### Now use the ALSRecommender class to get actual recommendations

In [309]:
from pymongo import MongoClient
from we_eat.ALS_recommender import ALSRecommender

In [85]:
we_eat_client = MongoClient()
we_eat_database = we_eat_client['we_eat']
survey_collection = we_eat_database['surveys']
partner_collection = we_eat_database['partners']

In [110]:
#For the ALSRecommender you need: item_factors_df and inverted_alias_dict
with open('data/item_factors_df.pkl', 'rb') as f:
    item_factors = pickle.load(f)

with open('data/inv_alias_dict.pickle', 'rb') as g:
    inv_alias_dict = pickle.load(g)

In [220]:
def recommend_for_two_users(user1, user2):
    """Return a recommendation, given two usernames."""
    user1_survey = survey_collection.find_one({'user': user1})
    user2_survey = survey_collection.find_one({'user': user2})
    recommender = ALSRecommender(item_factors, inv_alias_dict)
    user1_df = recommender.user_preds_from_survey(user1_survey)
    user2_df = recommender.user_preds_from_survey(user2_survey)
    compiled_df = recommender.get_combined_preds_df(user1_df, user2_df).T
    top_ten = recommender.min_dissat_recs(user1, user2, compiled_df, n=10)
    top_ten_list = list(top_ten.index)
    return top_ten
    #return f'<h2> Try this place out!  <a href="https://www.yelp.com/biz/{random_rec.index[0]}">{random_rec.index[0]}</a></h2>'

In [251]:
we_eat_database = we_eat_database.survey_collection.delete_one({'user': 'gabe'})

In [253]:
for itm in survey_collection.find().limit(5):
    print(itm)

{'_id': ObjectId('5c0fee7691d56f3b9ac1e345'), 'user': 'gabe', 'survey': {'green-leaf-vietnamese-restaurant-seattle': 8, 'il-corvo-pasta-seattle': 9, 'mee-sum-pastry-seattle': 0}}
{'_id': ObjectId('5c0fef9591d56f3b9ac1e346'), 'user': 'jack', 'survey': {'green-leaf-vietnamese-restaurant-seattle': 3, 'il-corvo-pasta-seattle': 6, 'mee-sum-pastry-seattle': 8, 'mod-pizza-seattle-7': 9, 'ba-bar-seattle': 10}}
{'_id': ObjectId('5c10288491d56f5cc3264cc0'), 'user': '', 'survey': {}}
{'_id': ObjectId('5c104dbc91d56f73b3b3b3df'), 'user': 'ryan', 'survey': {'green-leaf-vietnamese-restaurant-seattle': 9, 'il-corvo-pasta-seattle': 7, 'mee-sum-pastry-seattle': 9, 'mod-pizza-seattle-7': 8, 'ba-bar-seattle': 10, 'pike-place-chowder-seattle': 7, 'bacco-cafe-and-bistro-seattle-2': 8, 'metropolitan-grill-seattle': 9, 'el-borracho-seattle': 7, 'specialtys-café-and-bakery-seattle-34': 6, 'maneki-seattle': 8}}
{'_id': ObjectId('5c109ba591d56f7d7ee46919'), 'user': 'gabe', 'survey': {'green-leaf-vietnamese-res

In [310]:
recommend_for_two_users('nicole', 'gabe')

Unnamed: 0,nicole,gabe,mean,min
café-frieda-seattle-3,4.7,4.6,4.6,4.6
atlantic-street-pizza-slices-seattle,3.8,5.0,4.4,3.8
daawat-grill-seattle-2,3.7,4.9,4.3,3.7
gourmet-noodle-bowl-seattle,4.3,4.1,4.2,4.1
mikes-noodle-house-seattle,3.7,4.3,4.0,3.7
amazon-go-seattle-5,4.4,3.5,4.0,3.5
delicatus-seattle-2,3.5,4.4,4.0,3.5
90-karaoke-seattle-2,3.5,4.4,4.0,3.5
cafe-pho-seattle,3.4,4.5,4.0,3.4
jimmy-johns-seattle-11,3.4,4.6,4.0,3.4


In [254]:
recommender = ALSRecommender(item_factors, inv_alias_dict)

In [256]:
alex_raw_ratings = recommender.user_preds_from_survey('alex')

TypeError: string indices must be integers

In [113]:
idxs = recommender.get_restaurant_indexes(alex_raw_ratings, item_factors)

In [114]:
item_factors_array = np.array(item_factors['features'].tolist())

In [131]:
alex_array = recommender.get_user_factors_array(item_factors_array, idxs, alex_raw_ratings)

In [136]:
recommender.new_user_predict(alex_array, item_factors_array, 'alex')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,611,612,613,614,615,616,617,618,619,620
alex,-1.086018,2.942301,0.221897,-0.122837,-4.214041,2.013611,-2.356432,1.354076,-4.005055,-1.775023,...,0.224401,-1.417741,-1.332494,1.060587,2.487265,-1.029893,1.717257,-3.571009,-3.732381,-4.593557


In [125]:
nicole_raw_ratings = recommender.get_raw_ratings_df(nicole['survey'])

In [126]:
nicole_idxs = recommender.get_restaurant_indexes(nicole_raw_ratings, item_factors)

In [128]:
nicole_array = recommender.get_user_factors_array(item_factors_array, nicole_idxs, nicole_raw_ratings)

In [181]:
nicoles_preds = recommender.user_preds_from_survey(nicole)

In [308]:
(nicoles_preds.values - nicoles_preds.values.min())/(nicoles_preds.values.max()- nicoles_preds.values.min())

array([[ 0.86412637,  0.77749903,  0.50310389,  0.46583622,  0.32538099,
         0.82411265,  0.32495538,  0.67667855,  0.47205762,  0.57266988,
         0.65312194,  0.48570731,  0.41618088,  0.52476546,  0.55642871,
         0.64063144,  0.34330369,  0.54520134,  0.61841962,  0.76710549,
         0.49689763,  0.55852394,  0.54315476,  0.41509578,  0.67797209,
         0.46513548,  0.31744437,  0.71924755,  0.43517539,  0.42035126,
         0.69925713,  0.76308577,  0.60212787,  0.43706222,  0.60424133,
         0.43325482,  0.45452089,  0.61709724,  0.72588822,  0.72904838,
         0.56639712,  0.39704483,  0.66804608,  0.66939789,  0.56751509,
         0.60179705,  0.61872207,  0.72978922,  0.5758839 ,  0.46021005,
         0.55620509,  0.7478459 ,  0.31991107,  0.74335722,  0.66422629,
         0.68013822,  0.69800883,  0.86732117,  0.73175327,  0.61680133,
         0.58640159,  0.57367176,  0.56658591,  0.63422746,  0.56311104,
         0.5501696 ,  0.55445938,  0.60824283,  0.5

In [214]:
(nicoles_preds - float(nicoles_preds.mean(axis=1))) / (float(nicoles_preds.max(axis=1) - nicoles_preds.min(axis=1))) * 5

Unnamed: 0,amazon-go-seattle-5,the-pink-door-seattle-4,nirmals-seattle,biscuit-bitch-seattle-5,damn-the-weather-seattle,bad-bishop-seattle,pike-place-chowder-seattle,tsukushinbo-seattle,taylor-shellfish-oyster-bar-seattle-2,elliotts-oyster-house-seattle-2,...,mediterranean-cuisine-seattle,ricenroll-seattle-2,union-deli-la-puget-sound-plaza-building-seattle,measure-seattle,subway-seattle-3,u-bank-cafe-seattle,subway-seattle-31,subway-seattle-66,organic-to-go-seattle-4,chez-dave-seattle-2
nicole,1.44162,1.008483,-0.363492,-0.549831,-1.252107,1.241551,-1.254235,0.504381,-0.518724,-0.015663,...,0.14641,-0.565416,0.542035,-0.095941,-0.911693,-0.362947,0.427569,-0.430471,-0.121945,-0.491615


In [197]:
((nicoles_preds.loc['nicole'] - nicoles_preds.loc['nicole'].mean()) / int(nicoles_preds.max(axis=1) - nicoles_preds.min(axis=1))) * 5

amazon-go-seattle-5                                        1.472157
the-pink-door-seattle-4                                    1.029845
nirmals-seattle                                           -0.371192
biscuit-bitch-seattle-5                                   -0.561477
damn-the-weather-seattle                                  -1.278629
bad-bishop-seattle                                         1.267850
pike-place-chowder-seattle                                -1.280802
tsukushinbo-seattle                                        0.515065
taylor-shellfish-oyster-bar-seattle-2                     -0.529712
elliotts-oyster-house-seattle-2                           -0.015994
dough-zone-dumpling-house-seattle-2                        0.394787
the-london-plane-seattle-3                                -0.460017
a-hong-kong-kitchen-seattle                               -0.815013
radiator-whiskey-seattle                                  -0.260590
sizzling-pot-king-seattle-2                     

31

In [134]:
nicole_array

array([ 1.95262511,  4.08407358,  2.79806401,  1.01134286,  0.67436665,
        1.92153539, -5.58814072, -3.27817559,  0.65906346, -6.05405171])

In [133]:
alex_array

array([-2.62355235,  0.83300545,  0.31641287, -0.31200927, -0.65874339,
        1.55980876,  0.19099403, -0.84781937,  3.43563772, -2.98570759])

In [151]:
recommend_for_two_users('nicole', 'gabe')

Unnamed: 0,nicole,gabe,mean,min
café-frieda-seattle-3,11.8,1.7,6.8,1.7
new-star-seafood-restaurant-seattle,12.0,0.2,6.1,0.2
gourmet-noodle-bowl-seattle,7.8,1.3,4.6,1.3
walla-walla-farms-seattle-3,8.1,1.1,4.6,1.1
seattle-deli-seattle,7.8,0.9,4.4,0.9
amazon-go-seattle-5,7.7,0.6,4.2,0.6
patagon-seattle-2,8.3,0.2,4.2,0.2
jasmines-grill-and-bar-seattle-3,7.7,0.5,4.1,0.5
saffron-spice-seattle,7.1,0.8,4.0,0.8
duk-li-dim-sum-seattle,6.4,1.1,3.8,1.1


In [300]:
recommender.new_user_predict()

TypeError: new_user_predict() missing 1 required positional argument: 'username'

## Discover Meaning of Latent Features

In [275]:
item_features = if_df.set_index(keys='id').rename(inv_alias_dict)

In [285]:
item_features[
    ['lf1', 'lf2', 'lf3', 'lf4', 'lf5', 
    'lf6', 'lf7', 'lf8', 'lf9', 'lf10']] = pd.DataFrame(item_features['features'].values.tolist(), index=item_features.index)

In [289]:
item_features.drop(columns='features', inplace=True)

In [293]:
item_features.max()

lf1     1.722892
lf2     1.926288
lf3     2.318008
lf4     1.924033
lf5     1.816260
lf6     1.919416
lf7     2.175644
lf8     2.341703
lf9     1.971399
lf10    2.057283
dtype: float64

In [296]:
item_features[item_features['lf1'] > 1.4]

Unnamed: 0_level_0,lf1,lf2,lf3,lf4,lf5,lf6,lf7,lf8,lf9,lf10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
edgars-cantina-seattle,1.590337,0.757475,1.168396,0.535843,0.749513,0.169594,0.0,0.204292,0.145921,0.147541
naansense-seattle,1.641578,0.598217,0.550637,0.635827,0.894305,0.379165,0.37944,0.194365,0.844273,1.399915
the-juicy-café-seattle,1.559539,0.203142,1.254338,0.647909,0.740068,0.061297,0.686022,0.286337,0.155347,0.961216
joes-grilled-gourmet-dogs-seattle,1.49918,1.103328,0.315047,0.993801,1.203587,0.438152,1.178001,0.65234,0.760003,0.734551
the-peak-cafe-seattle,1.722892,0.295996,0.10487,0.637233,0.181137,0.249644,0.02959,0.199486,0.22372,0.878193
asia-bar-b-que-seattle,1.558284,0.656032,0.0,0.0,0.962377,0.530025,0.675288,0.089884,0.0,0.0
sprigy-seattle-2,1.434318,0.420031,0.255195,0.444666,0.231898,0.221093,0.309535,1.041525,1.097485,1.304141
pho-hoa-noodle-soup-seattle-2,1.687772,0.562775,0.210102,0.236986,0.718464,1.270142,0.226702,0.754302,0.42563,0.930452


In [295]:
item_features[item_features['lf2'] > 1.5]

Unnamed: 0_level_0,lf1,lf2,lf3,lf4,lf5,lf6,lf7,lf8,lf9,lf10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blarney-stone-pub-and-restaurant-seattle,0.402338,1.741942,0.192083,0.443123,0.678277,0.494706,0.827771,0.513743,0.380838,0.249133
nordstrom-grill-seattle-2,0.0,1.50029,0.429045,0.532105,0.360799,0.98951,0.148354,0.0,0.249636,0.107998
the-pink-door-seattle-4,0.54048,1.629279,0.939141,0.685097,0.628291,0.448695,0.597044,0.590072,0.609014,0.29231
sun-bakery-and-cafe-seattle,0.550413,1.777017,0.770815,0.785573,0.578943,0.54667,0.203654,0.212464,1.242564,0.778897
jimmy-johns-seattle-10,0.788836,1.505218,0.097764,0.092762,1.019103,0.205163,0.684783,0.078674,0.253266,0.737966
manus-tacos-seattle,0.014717,1.672771,0.83877,0.805532,0.650886,0.308722,1.203253,0.167087,0.071432,0.0
ocean-star-seattle,0.130536,1.585349,0.0,0.903809,0.603764,0.061178,0.993957,0.0,0.370874,0.509744
piroshki-on-3rd-seattle-2,0.669165,1.733265,0.443936,0.813778,0.500967,0.591918,0.627897,0.599078,0.991102,0.120128
dead-line-seattle,0.748923,1.649559,0.872527,0.43806,0.885559,0.589574,0.510882,0.863055,0.161316,0.68594
brgr-bar-seattle-seattle,0.365115,1.688619,0.923923,0.899783,0.694357,0.219938,0.445436,0.123597,1.259727,0.510624


In [298]:
item_features[item_features['lf3'] > 1.5]

Unnamed: 0_level_0,lf1,lf2,lf3,lf4,lf5,lf6,lf7,lf8,lf9,lf10
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
stella-fiore-wood-fired-pizza-seattle-3,0.011343,1.032287,1.657052,1.68634,0.0,0.033238,0.103568,0.144188,0.621215,0.0
slice-box-pizza-seattle,1.264537,0.748792,1.6631,0.210069,0.679211,0.56194,0.555554,0.563989,0.778168,0.148532
gopoké-seattle-5,0.419475,0.399,1.76684,1.077929,0.654047,0.602025,0.932534,1.025285,0.326879,0.226753
tamarind-tree-seattle,0.382479,0.544225,1.700911,0.587304,0.625644,1.003636,0.559271,0.277773,0.554376,0.20654
moore-coffee-seattle-2,0.158086,0.944315,1.681971,0.279571,1.293378,0.59743,0.653115,0.343156,0.298935,0.452331
din-tai-fung-seattle-17,0.00386,0.0,1.929113,1.060447,0.976702,0.164543,0.287002,0.002765,0.101028,0.946303
ummas-lunch-box-seattle,0.276635,0.494776,1.512918,0.928119,1.108482,0.809274,0.602978,0.834892,0.830747,0.138082
maxang-deli-and-cafe-seattle,0.687803,0.2146,1.554332,0.177377,0.388857,0.663399,1.121141,1.296088,0.552968,0.155321
off-the-rez-seattle,0.476147,1.337378,1.597466,0.974727,0.554509,0.518408,0.198163,0.451709,0.459143,0.465402
the-little-london-plane-seattle-2,0.518624,0.934102,1.98752,0.402104,0.584147,0.363166,0.81673,0.62755,0.160598,0.213647


In [27]:
#compile MongoDB
rest_db = build_database.build_database()

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


In [28]:
#retrieve MongoDB
db = build_database.retrieve_database()

In [273]:
db.head(11)

Unnamed: 0,_id,alias,categories,coordinates,display_phone,distance,id,image_url,is_closed,location,name,phone,price,rating,review_count,transactions,url,cats
0,5bf5e06f91d56f59f158cfa4,amazon-go-seattle-5,"[{'alias': 'convenience', 'title': 'Convenienc...","{'latitude': 47.60638, 'longitude': -122.33122}",,493.410866,XDfmz7Woxx6NkZDGBtAVAQ,https://s3-media1.fl.yelpcdn.com/bphoto/fDQhxw...,False,"{'address1': '920 5th Ave', 'address2': None, ...",Amazon Go,,,5.0,7,[],https://www.yelp.com/biz/amazon-go-seattle-5?a...,"convenience,salad,sandwiches"
1,5bf5e06f91d56f59f158cfa5,the-pink-door-seattle-4,"[{'alias': 'italian', 'title': 'Italian'}, {'a...","{'latitude': 47.61028, 'longitude': -122.3425}",(206) 443-3241,1327.897033,VOPdG8llLPaga9iJxXcMuQ,https://s3-media1.fl.yelpcdn.com/bphoto/c_vzRF...,False,"{'address1': '1919 Post Alley', 'address2': ''...",The Pink Door,12064433241.0,$$,4.5,4019,[],https://www.yelp.com/biz/the-pink-door-seattle...,"italian,wine_bars,seafood"
2,5bf5e06f91d56f59f158cfa6,nirmals-seattle,"[{'alias': 'indpak', 'title': 'Indian'}]","{'latitude': 47.60147, 'longitude': -122.33262}",(206) 683-9701,228.364627,GXz21OgpWOtnCF0GDXHPhA,https://s3-media3.fl.yelpcdn.com/bphoto/LVSEXb...,False,"{'address1': '106 Occidental Ave S', 'address2...",Nirmal's,12066839701.0,$$,4.0,424,"[restaurant_reservation, pickup]",https://www.yelp.com/biz/nirmals-seattle?adjus...,indpak
3,5bf5e06f91d56f59f158cfa7,bad-bishop-seattle,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...","{'latitude': 47.60286, 'longitude': -122.33427}",(206) 623-3440,344.167932,KFMsgY5mV_wCYsa0XX_hEQ,https://s3-media1.fl.yelpcdn.com/bphoto/1GpjX0...,False,"{'address1': '704 1st Ave', 'address2': None, ...",Bad Bishop,12066233440.0,,4.5,6,[],https://www.yelp.com/biz/bad-bishop-seattle?ad...,"cocktailbars,comfortfood"
4,5bf5e06f91d56f59f158cfa8,pike-place-chowder-seattle,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...","{'latitude': 47.60939, 'longitude': -122.34112}",(206) 267-2537,1176.460907,6I28wDuMBR5WLMqfKxaoeg,https://s3-media3.fl.yelpcdn.com/bphoto/ijju-w...,False,"{'address1': '1530 Post Aly', 'address2': 'Ste...",Pike Place Chowder,12062672537.0,$$,4.5,6317,[pickup],https://www.yelp.com/biz/pike-place-chowder-se...,"seafood,soup"
5,5bf5e06f91d56f59f158cfa9,biscuit-bitch-seattle-5,"[{'alias': 'southern', 'title': 'Southern'}, {...","{'latitude': 47.60324, 'longitude': -122.3321}",(206) 623-1859,217.757088,UzeuW2PwabdLQjrS4E6_kg,https://s3-media4.fl.yelpcdn.com/bphoto/WUnigN...,False,"{'address1': '621 3rd Ave', 'address2': '', 'a...",Biscuit Bitch,12066231859.0,$,4.5,764,[],https://www.yelp.com/biz/biscuit-bitch-seattle...,"southern,breakfast_brunch,coffee"
6,5bf5e06f91d56f59f158cfaa,damn-the-weather-seattle,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...","{'latitude': 47.6011167, 'longitude': -122.334...",(206) 946-1283,342.388755,UaszJCXczQ-djWsQ0ylVAA,https://s3-media1.fl.yelpcdn.com/bphoto/DqP1nz...,False,"{'address1': '116 1st Ave S', 'address2': '', ...",Damn the Weather,12069461283.0,$$,4.0,386,[],https://www.yelp.com/biz/damn-the-weather-seat...,"cocktailbars,newamerican,tapasmallplates"
7,5bf5e06f91d56f59f158cfab,tsukushinbo-seattle,"[{'alias': 'japanese', 'title': 'Japanese'}, {...","{'latitude': 47.59985, 'longitude': -122.32681}",(206) 467-4004,335.631448,Y-OgQKqreMaajTUCPbLPKg,https://s3-media3.fl.yelpcdn.com/bphoto/r6mFft...,False,"{'address1': '515 S Main St', 'address2': '', ...",Tsukushinbo,12064674004.0,$$,4.0,788,[],https://www.yelp.com/biz/tsukushinbo-seattle?a...,"japanese,sushi,seafood"
8,5bf5e06f91d56f59f158cfac,taylor-shellfish-oyster-bar-seattle-2,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...","{'latitude': 47.59884, 'longitude': -122.33268}",(206) 501-4060,423.744327,p1TOF1XkXzLx3a-LF3lDlw,https://s3-media2.fl.yelpcdn.com/bphoto/he49IR...,False,"{'address1': '410 Occidental Ave S', 'address2...",Taylor Shellfish Oyster Bar,12065014060.0,$$,4.5,525,[],https://www.yelp.com/biz/taylor-shellfish-oyst...,"seafood,raw_food"
9,5bf5e06f91d56f59f158cfad,elliotts-oyster-house-seattle-2,"[{'alias': 'seafood', 'title': 'Seafood'}]","{'latitude': 47.6054699, 'longitude': -122.34092}",(206) 623-4340,917.494299,xqH038QcquJEMm5LIZHd5w,https://s3-media3.fl.yelpcdn.com/bphoto/Am6Aab...,False,"{'address1': '1201 Alaskan Way', 'address2': '...",Elliott's Oyster House,12066234340.0,$$$,4.0,3047,[],https://www.yelp.com/biz/elliotts-oyster-house...,seafood


In [30]:
#clean database
cleaned_db = clean_database.clean_it_all(db)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [31]:
cleaned_db.shape

(673, 170)

In [32]:
V = cleaned_db.drop(columns=['id','image_url', 'location', 'rating', 'review_count',
       'transactions', 'url', 'dist_from_galvanize', 'cats', 'popularity'])

In [33]:
V

Unnamed: 0_level_0,category_convenience,category_salad,category_sandwiches,category_italian,category_wine_bars,category_seafood,category_indpak,category_cocktailbars,category_comfortfood,category_soup,...,category_beergardens,category_gelato,category_comedyclubs,category_theater,category_flowers,category_gourmet,$,$$,$$$,$$$$
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
amazon-go-seattle-5,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
the-pink-door-seattle-4,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
nirmals-seattle,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
bad-bishop-seattle,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,1,0,0
pike-place-chowder-seattle,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
biscuit-bitch-seattle-5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
damn-the-weather-seattle,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
tsukushinbo-seattle,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
taylor-shellfish-oyster-bar-seattle-2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
elliotts-oyster-house-seattle-2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [34]:
rr = recommender.RestaurantRecommender()

AttributeError: 'ALSModel' object has no attribute 'RestaurantRecommender'

### Read in the business info df (scraped from yelp)

In [None]:
bizinfo_df = pd.read_pickle('data/bizinfo_df.pkl')

In [None]:
bizinfo_df.columns

In [None]:
bizinfo_df.T

In [None]:
bizinfo_df.replace(to_replace={'Yes': 1, 'No': -1, None: 0})

## Merge bizinfo_df and cleaned_df to get full set of restaurant features

In [None]:
full_df = pd.merge(left=cleaned_db, right=bizinfo_df, how='outer', left_index=True, right_index=True)

In [None]:
full_df.shape

In [272]:
full_df

NameError: name 'full_df' is not defined