In [1]:
# package imports
# basics
import numpy as np
import pandas as pd

# Surprise package installation

Using pip:
pip install scikit-surprise

Using Conda:
conda install -c conda-forge scikit-surprise

For more information: https://github.com/NicolasHug/Surprise 

In [2]:
# surprise packages: Install using above commands!
from surprise import SVDpp
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader

In [3]:
# Load the data into pandas dataframes, we are interested only in reviews in order to get a recommendaation system
reviews=pd.read_csv("data/yelp_review.csv")

# Evaluation Metrics

#### RMSE: Root Mean Squared Error

#### MAE: Mean Squared Error

#### FCP: Fraction of Concordation Pairs

1. Measures the fraction of concordant pairs, it is a measure that generalizes the known AUC metric into non-binary ordered outcomes

2. Considers the predictions of CF as ordinal values

3. Read more: http://www.ijcai.org/Proceedings/13/Papers/449.pdf

In [4]:
# Set the reader for the custom dataset we have loaded.
reader = Reader(rating_scale=(1, 5))

# The columns needed for the algorithm are specified in the format (user, item, ratings), specify the equivalent of that
data = Dataset.load_from_df(reviews[['user_id', 'business_id', 'stars']], reader)

# The CF algorithm to use
algo = SVDpp(lr_all = 0.002, reg_all = 0.06, verbose = True)

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE', 'FCP'], cv=5, verbose=True)

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8


{'fit_time': (2512.735143661499,
  2508.2623755931854,
  2440.5247116088867,
  2347.500657081604,
  2449.694218635559),
 'test_fcp': array([0.57835188, 0.58100226, 0.58243106, 0.5798982 , 0.5802495 ]),
 'test_mae': array([1.0393247 , 1.03849194, 1.03995578, 1.03982831, 1.03976696]),
 'test_rmse': array([1.27613359, 1.2756702 , 1.27664698, 1.27678444, 1.27682756]),
 'test_time': (86.37213134765625,
  79.20862555503845,
  65.99044275283813,
  62.323349714279175,
  65.28745818138123)}