In [4]:
import sys; sys.path.append('../util')
from load_yelp_data import load_yelp_dataframe, restaurants_and_bars_in, train_test_split_reviews
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import graphlab as gl
import sklearn.metrics
%matplotlib inline

In [5]:
businesses = load_yelp_dataframe('businesses')
reviews = load_yelp_dataframe('reviews')
users = load_yelp_dataframe('users')
phoenix_restaurants, phoenix_reviews, phoenix_users = restaurants_and_bars_in('Phoenix', businesses, reviews, users)
reviews_train, reviews_test = train_test_split_reviews(phoenix_reviews)

In [6]:
train_sframe_full = gl.SFrame(reviews_train[['business_id', 'user_id', 'stars']])
test_sframe_full = gl.SFrame(reviews_test[['business_id', 'user_id', 'stars']])

## Matrix Factorization Library Comparisons

We're going to try:
- [GraphLab Create](https://turi.com/products/create/)'s [FactorizationRecommender](https://turi.com/products/create/docs/generated/graphlab.recommender.factorization_recommender.FactorizationRecommender.html),
- [Surprise](http://surpriselib.com/)'s [SVD++](http://surprise.readthedocs.io/en/latest/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVDpp), and
- [Vowpal Wabbit](https://github.com/JohnLangford/vowpal_wabbit)'s [matrix factorization mode](https://github.com/JohnLangford/vowpal_wabbit/wiki/Matrix-factorization-example)

In [51]:
# GraphLab

fac_model = gl.factorization_recommender.create(
    train_sframe_full, item_id='business_id', target='stars', verbose=False,
    num_factors=5, regularization=0.0001, linear_regularization=0.00001)

print 'train RMSE:', fac_model.evaluate_rmse(train_sframe_full, target='stars')['rmse_overall']
print ' test RMSE:', fac_model.evaluate_rmse(test_sframe_full, target='stars')['rmse_overall']

train RMSE: 1.07958617612
 test RMSE: 1.22519726836


In [55]:
# Surprise

import surprise

def dump_to_file(revs, fname):
    with open(fname, 'w') as f:
        for r in revs.itertuples():
            f.write('{}\t{}\t{}\n'.format(r.user_id, r.business_id, r.stars))   
dump_to_file(reviews_train, 'phoenix_reviews.data')
dump_to_file(reviews_test, 'phoenix_reviews.test')

reader = surprise.Reader(line_format='user item rating', sep='\t')
data = surprise.Dataset.load_from_folds([('phoenix_reviews.data', 'phoenix_reviews.test')], reader=reader)

svd_pp = surprise.prediction_algorithms.matrix_factorization.SVDpp()
surprise.evaluate(svd_pp, data)

Evaluating RMSE, MAE of algorithm SVDpp.

------------
Fold 1
RMSE: 1.2344
MAE:  0.9943
------------
------------
Mean RMSE: 1.2344
Mean MAE : 0.9943
------------
------------


CaseInsensitiveDefaultDict(list,
                           {u'mae': [0.99434851054475049],
                            u'rmse': [1.2344357910538006]})

In [9]:
# Vowpal Wabbit

import matrix_factorization_recommender as mfr

def X_and_y_of(df):
    return df[['user_id', 'business_id']].values, df['stars'].values
X_train, y_train = X_and_y_of(reviews_train)
X_test, y_test = X_and_y_of(reviews_test)

mf = mfr.MatrixFactorizationRecommender()
mf.fit(X_train, y_train)
print(mf.rmse(X_test, y_test))

1.23617982193


It looks like GraphLab performed the best, but I also spent the most time tweaking its regularization parameters, to which all of these models are sensitive :)

Let's see if we can do any better using baselines:

In [10]:
from baseline_calculator import *
drbc = DecoupledRegularizedBaselineCalculator()
drbc.fit(reviews_train, busi_reg_strength=2.75, user_reg_strength=5.25)
drbc.transform(reviews_train, key='baseline_stars')
drbc.transform(reviews_test, key='baseline_stars'); None

In [11]:
drbc.baseline_rmse(reviews_test)

1.2247480610519876

In [74]:
train_sframe_base = gl.SFrame(reviews_train[['business_id', 'user_id', 'baseline_stars']])
test_sframe_base = gl.SFrame(reviews_test[['business_id', 'user_id', 'baseline_stars']])

fac_model2 = gl.factorization_recommender.create(
    train_sframe_base, item_id='business_id', target='baseline_stars', verbose=False,
    num_factors=10, regularization=0.002, linear_regularization=0.0001)

predictions = fac_model2.predict(test_sframe_base)
np.sqrt(sklearn.metrics.mean_squared_error(reviews_test.stars, drbc.inverse_transform(reviews_test, predictions)))

1.2242945186960277

So we get a lower RMSE using baselines + factorization than we do from baselines or factorization alone -- though not by much, and we had to tweak the regularization parameters a lot. Let's try it using our frequency-based regularization:

In [14]:
mrbc = MoreThanOneReviewBizBaselineCalculator()
mrbc.fit(reviews_train, l2_penalty=4.67, tol=1, maxiters=1000, learning_rate=0.00001, verbose=False)

In [19]:
mrbc.transform(reviews_train, key='starz')
mrbc.transform(reviews_test, key='starz')
mrbc.baseline_rmse(reviews_test)

1.2238666738910313

In [44]:
train_sframe_base = gl.SFrame(reviews_train[['business_id', 'user_id', 'starz']])
test_sframe_base = gl.SFrame(reviews_test[['business_id', 'user_id', 'starz']])

fac_model2 = gl.factorization_recommender.create(
    train_sframe_base, item_id='business_id', target='starz', verbose=False,
    num_factors=10, regularization=0.0015, linear_regularization=0.0025)

predictions = fac_model2.predict(test_sframe_base)
np.sqrt(sklearn.metrics.mean_squared_error(reviews_test.stars, mrbc.inverse_transform(reviews_test, predictions)))

1.223557843800396

So, again we do best with factorization + baselines (+ iterating through parameters), but again not by much.