# Recommerder System Model Selection
In this notebook, we implement several popular algorithms to build a recommender system.

The whole notebook is based on Python package named [Surprise](http://surpriselib.com/)

In [None]:
import warnings
import time
import numpy as np
import pandas as pd

from surprise import NormalPredictor
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise import Dataset
from surprise import evaluate
from surprise import print_perf
from surprise import Reader

warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
# Start of the algorithm
t_start = time.time()

# Build recommender system and make predictions

In [None]:
# build the reader and read the dataset
reader = Reader(line_format='user item rating timestamp', sep=r'::')
data = Dataset.load_from_file('./data/ratings.dat', reader=reader)

In [None]:
# split into 5 folders for cross validation
np.random.seed(2017)
data.split(n_folds=5, shuffle=True)

# 1. [Normal Predictor Algorithm](http://surprise.readthedocs.io/en/stable/basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor)

In [None]:
# build the algorithm
algo1 = NormalPredictor()

# evaluate the performance of the algorithms
perf1 = evaluate(algo1, data, measures=['RMSE'], verbose=False)
print_perf(perf1)
print('Mean RMSE is:\t', np.mean(perf1['rmse']))

# 2. [Baseline Only Algorithm](http://surprise.readthedocs.io/en/stable/basic_algorithms.html#surprise.prediction_algorithms.baseline_only.BaselineOnly)

In [None]:
# define parameters
bsl_options = {'method': 'als',
               'n_epochs': 10,
               'reg_i': 10,
               'reg_u': 15}

# build the algorithm
algo2 = BaselineOnly(bsl_options=bsl_options)

# evaluate the performance of the algorithms
perf2 = evaluate(algo2, data, measures=['RMSE'], verbose=False)
print_perf(perf2)
print('Mean RMSE is:\t', np.mean(perf2['rmse']))

# 3. [KNN Basic Algorithm](http://surprise.readthedocs.io/en/stable/knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic)

In [None]:
sim_options = {'name': 'cosine',
               'user_based': True,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'cosine',
               'user_based': False,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'msd',
               'user_based': True,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'msd',
               'user_based': False,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'pearson',
               'user_based': True,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'pearson',
               'user_based': False,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'pearson_baseline',
               'user_based': True,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

In [None]:
sim_options = {'name': 'pearson_baseline',
               'user_based': False,
               'shrinkage': 100}

# build the algorithm
algo3 = KNNBasic(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf3 = evaluate(algo3, data, measures=['RMSE'], verbose=False)
print_perf(perf3)
print('Mean RMSE is:\t', np.mean(perf3['rmse']))

# 4. [KNN with Means Algorithm](http://surprise.readthedocs.io/en/stable/knn_inspired.html#surprise.prediction_algorithms.knns.KNNWithMeans)

In [None]:
sim_options = {'name': 'msd',
               'user_based': True,
               'shrinkage': 100}

# build the algorithm
algo4 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf4 = evaluate(algo4, data, measures=['RMSE'], verbose=False)
print_perf(perf4)
print('Mean RMSE is:\t', np.mean(perf4['rmse']))

In [None]:
sim_options = {'name': 'msd',
               'user_based': False,
               'shrinkage': 100}

# build the algorithm
algo4 = KNNWithMeans(k=40, min_k=1, sim_options=sim_options)

# evaluate the performance of the algorithms
perf4 = evaluate(algo4, data, measures=['RMSE'], verbose=False)
print_perf(perf4)
print('Mean RMSE is:\t', np.mean(perf4['rmse']))

# 5. [KNN Base Line Algorithm](http://surprise.readthedocs.io/en/stable/knn_inspired.html#surprise.prediction_algorithms.knns.KNNBaseline)

In [None]:
# define parameters
sim_options = {'name': 'msd',
               'user_based': True,
               'shrinkage': 100}

bsl_options = {'method': 'als',
               'n_epochs': 10,
               'reg_i': 10,
               'reg_u': 15}

# build the algorithm
algo5 = KNNBaseline(k=40, min_k=1, sim_options=sim_options, bsl_options=bsl_options)

# evaluate the performance of the algorithms
perf5 = evaluate(algo5, data, measures=['RMSE'], verbose=False)
print_perf(perf5)
print('Mean RMSE is:\t', np.mean(perf5['rmse']))

In [None]:
# define parameters
sim_options = {'name': 'msd',
               'user_based': False,
               'shrinkage': 100}

bsl_options = {'method': 'als',
               'n_epochs': 10,
               'reg_i': 10,
               'reg_u': 15}

# build the algorithm
algo5 = KNNBaseline(k=40, min_k=1, sim_options=sim_options, bsl_options=bsl_options)

# evaluate the performance of the algorithms
perf5 = evaluate(algo5, data, measures=['RMSE'], verbose=False)
print_perf(perf5)
print('Mean RMSE is:\t', np.mean(perf5['rmse']))

# 6. [SVD Algorithm](http://surprise.readthedocs.io/en/stable/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD)

In [None]:
# build the algorithm
algo6 = SVD(n_factors=100, n_epochs=20, biased=True, init_mean=0,
            init_std_dev=.1, lr_all=.005,
            reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None, lr_qi=None,
            reg_bu=None, reg_bi=None, reg_pu=None, reg_qi=None,
            verbose=False)

# evaluate the performance of the algorithms
perf6 = evaluate(algo6, data, measures=['RMSE'], verbose=False)
print_perf(perf6)
print('Mean RMSE is:\t', np.mean(perf6['rmse']))

# 7. [SVD++ Algorithm](http://surprise.readthedocs.io/en/stable/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVDpp)

In [None]:
# build the algorithm
algo7 = SVDpp(n_factors=20, n_epochs=20, init_mean=0, init_std_dev=.1,
              lr_all=.007, reg_all=.02, lr_bu=None, lr_bi=None, lr_pu=None,
              lr_qi=None, lr_yj=None, reg_bu=None, reg_bi=None, reg_pu=None,
              reg_qi=None, reg_yj=None, verbose=False)

# evaluate the performance of the algorithms
perf7 = evaluate(algo7, data, measures=['RMSE'], verbose=False)
print_perf(perf7)
print('Mean RMSE is:\t', np.mean(perf7['rmse']))

# 8. [NMF Algorithm](http://surprise.readthedocs.io/en/stable/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.NMF)

In [None]:
# build the algorithm
algo8 = NMF(n_factors=15, n_epochs=50, biased=False, reg_pu=.06,
            reg_qi=.06, reg_bu=.02, reg_bi=.02, lr_bu=.005, lr_bi=.005,
            init_low=0, init_high=1, verbose=False)

# evaluate the performance of the algorithms
perf8 = evaluate(algo8, data, measures=['RMSE'], verbose=False)
print_perf(perf8)
print('Mean RMSE is:\t', np.mean(perf8['rmse']))

# 9. [Slope One Algorithm](http://surprise.readthedocs.io/en/stable/slope_one.html#surprise.prediction_algorithms.slope_one.SlopeOne)

In [None]:
# build the algorithm
algo9 = SlopeOne()

# evaluate the performance of the algorithms
perf9 = evaluate(algo9, data, measures=['RMSE'], verbose=False)
print_perf(perf9)
print('Mean RMSE is:\t', np.mean(perf9['rmse']))

# 10. [Co-Clustering Algorithm](http://surprise.readthedocs.io/en/stable/co_clustering.html#surprise.prediction_algorithms.co_clustering.CoClustering)

In [None]:
# build the algorithm
algo10 = CoClustering(n_cltr_u=3, n_cltr_i=3, n_epochs=20, verbose=False)

# evaluate the performance of the algorithms
perf10 = evaluate(algo10, data, measures=['RMSE'], verbose=False)
print_perf(perf10)
print('Mean RMSE is:\t', np.mean(perf10['rmse']))

# End of the program

In [None]:
# get the total code used time information
t_end = time.time()
print("Program running time\n")
print('Start time:\t{0:15.2f}'.format(t_start))
print('Stop  time:\t{0:15.2f}'.format(t_end))
print('Total time:\t{0:15.2f} seconds'.format(t_end - t_start))