In [147]:
import pandas as pd
import numpy as np
import pickle
from we_eat import (build_database, build_user_matrix, clean_database, 
                    extract_reviews, ALS_recommender, recommender)


## Import data from review scrape

In [2]:
als_df = pd.read_pickle('data/als_df.pkl')

In [3]:
als_df.reset_index(drop=True, inplace=True)

In [4]:
als_df = als_df[['user_id', 'item_id', 'rating', 'date']]

In [5]:
als_df.shape

(5216, 4)

## Spark ALS

In [6]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS

In [7]:
spark = SparkSession.builder.getOrCreate()

In [8]:
spark_als_df = spark.createDataFrame(als_df) 

In [9]:
pandas_als_df = spark_als_df.toPandas()

In [10]:
pandas_als_df.head()

Unnamed: 0,user_id,item_id,rating,date
0,1520,596,3.0,2005-08-02
1,1520,592,4.0,2005-09-14
2,1369,480,4.0,2006-05-13
3,1369,601,5.0,2006-05-19
4,1369,488,5.0,2006-05-22


In [11]:
pandas_als_df = pandas_als_df[['user_id', 'item_id', 'rating']]

In [12]:
spark_als_df = spark.createDataFrame(pandas_als_df)

In [13]:
als_model = ALS(
    itemCol='item_id',
    userCol='user_id',
    ratingCol='rating',
    nonnegative=True,    
    regParam=0.1,
    rank=10,
    coldStartStrategy = 'drop') 

In [14]:
recommender = als_model.fit(spark_als_df)

## Find the Latent Features Matrices

In [15]:
uf_df = recommender.userFactors.toPandas()

In [16]:
if_df = recommender.itemFactors.toPandas()

In [17]:
user_factors_array = np.array(uf_df['features'].tolist())

In [18]:
item_factors_array = np.array(if_df['features'].tolist())

In [156]:
pandas_als_df.pivot_table('rating','user_id','item_id')


item_id,0,1,2,3,4,5,6,7,8,9,...,611,612,613,614,615,616,617,618,619,620
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,4.0,,,,,,,,,,...,,,,,,,,,,
1,5.0,,,,,,,,,,...,,,,,,,,,,
2,5.0,,,,,,,,,,...,,,,,,,,,,
3,5.0,,,,,,,,,,...,,,,,,,,,,
4,5.0,,,,,,,,,,...,,,,,,,,,,
5,,4.0,,,,,,,,,...,,,,,,,,,,
6,,5.0,,,,,,,,,...,,,,,,,,,,
7,,4.0,,,,,,,,,...,,,,,,,,,,
8,,5.0,,,,,,,,,...,,,,,,,,,,
9,,4.0,,,,,,,,,...,,,,,,,,,,


## Recommendations based on Minimizing Dissatisfaction

### Mess around with small example

In [19]:
user1 = 'gabe'
user2 = 'nicole'

In [20]:
preds_db = pd.DataFrame(data=[[1, 4], [5, 8], [7, 9], [3, 4], [8, 8], [7, 6], [5, 2], [4, 9]], 
                              index=['rest1', 'rest2', 'rest3', 'rest4', 'rest5', 'rest6', 'rest7', 'rest8'], 
                              columns=[user1, user2])
                              
                              

In [21]:
preds_db['mean']=preds_db.mean(axis=1)
preds_db['min']=preds_db.min(axis=1)
preds_db_sorted = preds_db.sort_values(by=['mean','min'], ascending=False)
preds_db_sorted

Unnamed: 0,gabe,nicole,mean,min
rest5,8,8,8.0,8.0
rest3,7,9,8.0,7.0
rest6,7,6,6.5,6.0
rest2,5,8,6.5,5.0
rest8,4,9,6.5,4.0
rest4,3,4,3.5,3.0
rest7,5,2,3.5,2.0
rest1,1,4,2.5,1.0


### Now use the ALSRecommender class to get actual recommendations

In [148]:
from pymongo import MongoClient
from we_eat.ALS_recommender import ALSRecommender

In [85]:
we_eat_client = MongoClient()
we_eat_database = we_eat_client['we_eat']
survey_collection = we_eat_database['surveys']
partner_collection = we_eat_database['partners']

In [110]:
#For the ALSRecommender you need: item_factors_df and inverted_alias_dict
with open('data/item_factors_df.pkl', 'rb') as f:
    item_factors = pickle.load(f)

with open('data/inv_alias_dict.pickle', 'rb') as g:
    inv_alias_dict = pickle.load(g)

In [95]:
def recommend_for_two_users(user1, user2):
    """Return a recommendation, given two usernames."""
    user1_survey = survey_collection.find_one({'user': user1})
    user2_survey = survey_collection.find_one({'user': user2})
    recommender = ALSRecommender(item_factors, inv_alias_dict)
    user1_df = recommender.user_preds_from_survey(user1_survey)
    user2_df = recommender.user_preds_from_survey(user2_survey)
    compiled_df = recommender.get_combined_preds_df(user1_df, user2_df).T
    top_ten = recommender.min_dissat_recs(user1, user2, compiled_df, n=10)
    top_ten_list = list(top_ten.index)
    return top_ten
    #return f'<h2> Try this place out!  <a href="https://www.yelp.com/biz/{random_rec.index[0]}">{random_rec.index[0]}</a></h2>'

In [98]:
recommend_for_two_users('alex', 'nicole')

Unnamed: 0,alex,nicole,mean,min
café-frieda-seattle-3,3.1,11.8,7.4,3.1
purple-café-and-wine-bar-seattle-3,8.5,5.9,7.2,5.9
jasmines-grill-and-bar-seattle-3,4.6,7.7,6.2,4.6
new-star-seafood-restaurant-seattle,0.0,12.0,6.0,0.0
loulay-kitchen-and-bar-seattle,4.5,6.9,5.7,4.5
walla-walla-farms-seattle-3,1.9,8.1,5.0,1.9
seattle-deli-seattle,1.4,7.8,4.6,1.4
noi-thai-cuisine-seattle-3,3.4,5.5,4.4,3.4
patagon-seattle-2,0.5,8.3,4.4,0.5
bad-bishop-seattle,2.0,6.4,4.2,2.0


In [97]:
recommend_for_two_users('nicole', 'gabe')

Unnamed: 0,nicole,gabe,mean,min
café-frieda-seattle-3,11.8,1.7,6.8,1.7
new-star-seafood-restaurant-seattle,12.0,0.2,6.1,0.2
gourmet-noodle-bowl-seattle,7.8,1.3,4.6,1.3
walla-walla-farms-seattle-3,8.1,1.1,4.6,1.1
seattle-deli-seattle,7.8,0.9,4.4,0.9
amazon-go-seattle-5,7.7,0.6,4.2,0.6
patagon-seattle-2,8.3,0.2,4.2,0.2
jasmines-grill-and-bar-seattle-3,7.7,0.5,4.1,0.5
saffron-spice-seattle,7.1,0.8,4.0,0.8
duk-li-dim-sum-seattle,6.4,1.1,3.8,1.1


In [78]:
nicole = survey_collection.find_one({'user': 'nicole'})

In [79]:
nicole

{'_id': ObjectId('5c109e7091d56f7d7ee4691a'),
 'survey': {'ba-bar-seattle': 8,
  'bacco-cafe-and-bistro-seattle-2': 7,
  'el-borracho-seattle': 7,
  'green-leaf-vietnamese-restaurant-seattle': 10,
  'il-corvo-pasta-seattle': 6,
  'maneki-seattle': 6,
  'mee-sum-pastry-seattle': 7,
  'metropolitan-grill-seattle': 1,
  'mod-pizza-seattle-7': 6,
  'pike-place-chowder-seattle': 4,
  'specialtys-café-and-bakery-seattle-34': 5},
 'user': 'nicole'}

In [152]:
survey_collection.find_one({'user': 'gabe'})

{'_id': ObjectId('5c0fee7691d56f3b9ac1e345'),
 'survey': {'green-leaf-vietnamese-restaurant-seattle': 8,
  'il-corvo-pasta-seattle': 9,
  'mee-sum-pastry-seattle': 0},
 'user': 'gabe'}

In [48]:
def standardize_survey_results(user_survey):
    """Return the survey_results dictionary with the ratings on a scale of 1-5 (to make
    it the same as Yelp's scale)"""
    return {k: v / 2 for k, v in user_survey.items()} 

In [67]:
nic = standardize_survey_results(user_survey=nicole['survey'])

In [82]:
from statistics import mean, stdev
def standardize(survey):
    ave = mean(survey.values())
    sd = stdev(survey.values())
    return {k: (v-ave)/sd for k, v in survey.items()}

In [83]:
standardize(nicole['survey'])

{'ba-bar-seattle': 0.8299682133907225,
 'bacco-cafe-and-bistro-seattle-2': 0.3952229587574869,
 'el-borracho-seattle': 0.3952229587574869,
 'green-leaf-vietnamese-restaurant-seattle': 1.6994587226571938,
 'il-corvo-pasta-seattle': -0.03952229587574865,
 'maneki-seattle': -0.03952229587574865,
 'mee-sum-pastry-seattle': 0.3952229587574869,
 'metropolitan-grill-seattle': -2.2132485690419266,
 'mod-pizza-seattle-7': -0.03952229587574865,
 'pike-place-chowder-seattle': -0.9090128051422198,
 'specialtys-café-and-bakery-seattle-34': -0.4742675505089842}

In [99]:
alex = survey_collection.find_one({'user': 'alex'})

In [139]:
standardize(alex['survey'])

{'ba-bar-seattle': -1.763834207376394,
 'bacco-cafe-and-bistro-seattle-2': 0.05039526306789679,
 'green-leaf-vietnamese-restaurant-seattle': 0.5039526306789694,
 'il-corvo-pasta-seattle': 0.05039526306789679,
 'maneki-seattle': -0.8567194721542486,
 'mee-sum-pastry-seattle': -0.8567194721542486,
 'metropolitan-grill-seattle': 0.5039526306789694,
 'mod-pizza-seattle-7': 0.9575099982900421,
 'pike-place-chowder-seattle': 1.4110673659011148}

In [159]:
recommender = ALSRecommender(item_factors, inv_alias_dict)

In [107]:
alex_raw_ratings = recommender.get_raw_ratings_df(alex['survey'])

In [157]:
alex_raw_ratings

Unnamed: 0,item_id,rating
0,6,1.411067
1,29,0.503953
2,33,-0.856719
3,53,0.503953
4,126,-1.763834
5,232,0.95751
6,322,0.050395
7,338,-0.856719
8,556,0.050395


In [113]:
idxs = recommender.get_restaurant_indexes(alex_raw_ratings, item_factors)

In [114]:
item_factors_array = np.array(item_factors['features'].tolist())

In [131]:
alex_array = recommender.get_user_factors_array(item_factors_array, idxs, alex_raw_ratings)

In [136]:
recommender.new_user_predict(alex_array, item_factors_array, 'alex')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,611,612,613,614,615,616,617,618,619,620
alex,-1.086018,2.942301,0.221897,-0.122837,-4.214041,2.013611,-2.356432,1.354076,-4.005055,-1.775023,...,0.224401,-1.417741,-1.332494,1.060587,2.487265,-1.029893,1.717257,-3.571009,-3.732381,-4.593557


In [125]:
nicole_raw_ratings = recommender.get_raw_ratings_df(nicole['survey'])

In [126]:
nicole_idxs = recommender.get_restaurant_indexes(nicole_raw_ratings, item_factors)

In [128]:
nicole_array = recommender.get_user_factors_array(item_factors_array, nicole_idxs, nicole_raw_ratings)

In [181]:
nicoles_preds = recommender.user_preds_from_survey(nicole)

In [182]:
nicoles_preds

Unnamed: 0,amazon-go-seattle-5,the-pink-door-seattle-4,nirmals-seattle,biscuit-bitch-seattle-5,damn-the-weather-seattle,bad-bishop-seattle,pike-place-chowder-seattle,tsukushinbo-seattle,taylor-shellfish-oyster-bar-seattle-2,elliotts-oyster-house-seattle-2,...,mediterranean-cuisine-seattle,ricenroll-seattle-2,union-deli-la-puget-sound-plaza-building-seattle,measure-seattle,subway-seattle-3,u-bank-cafe-seattle,subway-seattle-31,subway-seattle-66,organic-to-go-seattle-4,chez-dave-seattle-2
nicole,7.707907,4.965576,-3.720854,-4.900624,-9.346965,6.441207,-9.360439,1.773937,-4.703675,-1.518628,...,-0.492494,-4.999301,2.012336,-2.026898,-7.19169,-3.7174,1.287617,-4.144915,-2.19154,-4.532042


In [179]:
((nicoles_preds.loc['nicole'] - nicoles_preds.loc['nicole'].mean()) / (nicoles_preds.max(axis=1) - nicoles_preds.min(axis=1))) * 5

In [189]:
nicoles_preds.loc['nicole'] - nicoles_preds.loc['nicole'].mean()

amazon-go-seattle-5                                        9.127371
the-pink-door-seattle-4                                    6.385039
nirmals-seattle                                           -2.301391
biscuit-bitch-seattle-5                                   -3.481160
damn-the-weather-seattle                                  -7.927502
bad-bishop-seattle                                         7.860670
pike-place-chowder-seattle                                -7.940975
tsukushinbo-seattle                                        3.193401
taylor-shellfish-oyster-bar-seattle-2                     -3.284212
elliotts-oyster-house-seattle-2                           -0.099165
dough-zone-dumpling-house-seattle-2                        2.447678
the-london-plane-seattle-3                                -2.852108
a-hong-kong-kitchen-seattle                               -5.053082
radiator-whiskey-seattle                                  -1.615658
sizzling-pot-king-seattle-2                     

In [134]:
nicole_array

array([ 1.95262511,  4.08407358,  2.79806401,  1.01134286,  0.67436665,
        1.92153539, -5.58814072, -3.27817559,  0.65906346, -6.05405171])

In [133]:
alex_array

array([-2.62355235,  0.83300545,  0.31641287, -0.31200927, -0.65874339,
        1.55980876,  0.19099403, -0.84781937,  3.43563772, -2.98570759])

In [151]:
recommend_for_two_users('nicole', 'gabe')

Unnamed: 0,nicole,gabe,mean,min
café-frieda-seattle-3,11.8,1.7,6.8,1.7
new-star-seafood-restaurant-seattle,12.0,0.2,6.1,0.2
gourmet-noodle-bowl-seattle,7.8,1.3,4.6,1.3
walla-walla-farms-seattle-3,8.1,1.1,4.6,1.1
seattle-deli-seattle,7.8,0.9,4.4,0.9
amazon-go-seattle-5,7.7,0.6,4.2,0.6
patagon-seattle-2,8.3,0.2,4.2,0.2
jasmines-grill-and-bar-seattle-3,7.7,0.5,4.1,0.5
saffron-spice-seattle,7.1,0.8,4.0,0.8
duk-li-dim-sum-seattle,6.4,1.1,3.8,1.1


## Discover Meaning of Latent Features

In [27]:
#compile MongoDB
rest_db = build_database.build_database()

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950


In [28]:
#retrieve MongoDB
db = build_database.retrieve_database()

In [29]:
db.head()

Unnamed: 0,_id,alias,categories,coordinates,display_phone,distance,id,image_url,is_closed,location,name,phone,price,rating,review_count,transactions,url
0,5bf5e06f91d56f59f158cfa4,amazon-go-seattle-5,"[{'alias': 'convenience', 'title': 'Convenienc...","{'latitude': 47.60638, 'longitude': -122.33122}",,493.410866,XDfmz7Woxx6NkZDGBtAVAQ,https://s3-media1.fl.yelpcdn.com/bphoto/fDQhxw...,False,"{'address1': '920 5th Ave', 'address2': None, ...",Amazon Go,,,5.0,7,[],https://www.yelp.com/biz/amazon-go-seattle-5?a...
1,5bf5e06f91d56f59f158cfa5,the-pink-door-seattle-4,"[{'alias': 'italian', 'title': 'Italian'}, {'a...","{'latitude': 47.61028, 'longitude': -122.3425}",(206) 443-3241,1327.897033,VOPdG8llLPaga9iJxXcMuQ,https://s3-media1.fl.yelpcdn.com/bphoto/c_vzRF...,False,"{'address1': '1919 Post Alley', 'address2': ''...",The Pink Door,12064433241.0,$$,4.5,4019,[],https://www.yelp.com/biz/the-pink-door-seattle...
2,5bf5e06f91d56f59f158cfa6,nirmals-seattle,"[{'alias': 'indpak', 'title': 'Indian'}]","{'latitude': 47.60147, 'longitude': -122.33262}",(206) 683-9701,228.364627,GXz21OgpWOtnCF0GDXHPhA,https://s3-media3.fl.yelpcdn.com/bphoto/LVSEXb...,False,"{'address1': '106 Occidental Ave S', 'address2...",Nirmal's,12066839701.0,$$,4.0,424,"[restaurant_reservation, pickup]",https://www.yelp.com/biz/nirmals-seattle?adjus...
3,5bf5e06f91d56f59f158cfa7,bad-bishop-seattle,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...","{'latitude': 47.60286, 'longitude': -122.33427}",(206) 623-3440,344.167932,KFMsgY5mV_wCYsa0XX_hEQ,https://s3-media1.fl.yelpcdn.com/bphoto/1GpjX0...,False,"{'address1': '704 1st Ave', 'address2': None, ...",Bad Bishop,12066233440.0,,4.5,6,[],https://www.yelp.com/biz/bad-bishop-seattle?ad...
4,5bf5e06f91d56f59f158cfa8,pike-place-chowder-seattle,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...","{'latitude': 47.60939, 'longitude': -122.34112}",(206) 267-2537,1176.460907,6I28wDuMBR5WLMqfKxaoeg,https://s3-media3.fl.yelpcdn.com/bphoto/ijju-w...,False,"{'address1': '1530 Post Aly', 'address2': 'Ste...",Pike Place Chowder,12062672537.0,$$,4.5,6317,[pickup],https://www.yelp.com/biz/pike-place-chowder-se...


In [30]:
#clean database
cleaned_db = clean_database.clean_it_all(db)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [31]:
cleaned_db.shape

(673, 170)

In [32]:
V = cleaned_db.drop(columns=['id','image_url', 'location', 'rating', 'review_count',
       'transactions', 'url', 'dist_from_galvanize', 'cats', 'popularity'])

In [33]:
V

Unnamed: 0_level_0,category_convenience,category_salad,category_sandwiches,category_italian,category_wine_bars,category_seafood,category_indpak,category_cocktailbars,category_comfortfood,category_soup,...,category_beergardens,category_gelato,category_comedyclubs,category_theater,category_flowers,category_gourmet,$,$$,$$$,$$$$
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
amazon-go-seattle-5,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
the-pink-door-seattle-4,0,0,0,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
nirmals-seattle,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
bad-bishop-seattle,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,1,0,0
pike-place-chowder-seattle,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
biscuit-bitch-seattle-5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
damn-the-weather-seattle,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
tsukushinbo-seattle,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
taylor-shellfish-oyster-bar-seattle-2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
elliotts-oyster-house-seattle-2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [34]:
rr = recommender.RestaurantRecommender()

AttributeError: 'ALSModel' object has no attribute 'RestaurantRecommender'

### Read in the business info df (scraped from yelp)

In [None]:
bizinfo_df = pd.read_pickle('data/bizinfo_df.pkl')

In [None]:
bizinfo_df.columns

In [None]:
bizinfo_df.T

In [None]:
bizinfo_df.replace(to_replace={'Yes': 1, 'No': -1, None: 0})

## Merge bizinfo_df and cleaned_df to get full set of restaurant features

In [None]:
full_df = pd.merge(left=cleaned_db, right=bizinfo_df, how='outer', left_index=True, right_index=True)

In [None]:
full_df.shape

In [None]:
full_df