# Recommendations using surprise library models

This notebook uses the surprise library to build a model using surprise library and generate ratings for the test data.

**Models:** SVD-based and KNN-based collaborative filtering are explored. Hyperparameter tuning for the models is performed.

**Evaluation:** The test data ratings are stored for further evaluation such as RMSE and MAE. The recommendations are evaluated using precision@5, recall@5, NDCG and overall accuracy. 

In [1]:
from surprise import SVD, SVDpp
from surprise.prediction_algorithms import KNNBasic, KNNWithMeans
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import accuracy
from surprise.model_selection import train_test_split

In [2]:
import math
from collections import defaultdict
import csv
from sklearn.metrics import ndcg_score
import numpy as np
import pandas as pd
import time

In [3]:
def convert_traintest_dataframe_forsurprise(training_dataframe, testing_dataframe):
    reader = Reader(rating_scale=(0, 5))
    trainset = Dataset.load_from_df(training_dataframe[['userId', 'tmdbId', 'rating']], reader)
    testset = Dataset.load_from_df(testing_dataframe[['userId', 'tmdbId', 'rating']], reader)
    trainset = trainset.construct_trainset(trainset.raw_ratings)
    testset = testset.construct_testset(testset.raw_ratings)
    return trainset, testset

In [4]:
file_path_train = '../0_data/processed/training_data.csv'
file_path_test = '../0_data/processed/testing_data.csv'
traindf = pd.read_csv(file_path_train)
testdf = pd.read_csv(file_path_test)
trainset, testset = convert_traintest_dataframe_forsurprise(traindf, testdf)

In [5]:
def get_top_n(predictions, n):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    org_ratings = defaultdict(list)

    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        org_ratings[uid].append((iid, true_r))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n, org_ratings

In [6]:
def dcg_at_k(scores):
    return scores[0] + sum(sc/math.log(ind, 2) for sc, ind in zip(scores[1:], range(2, len(scores) + 1)))

def ndcg_at_k(scores):
    idcg = dcg_at_k(sorted(scores, reverse=True))
    return (dcg_at_k(scores)/idcg) if idcg > 0.0 else 0.0

In [7]:
def precision_recall_at_k(predictions, k=5, threshold=3.5):
    '''Return precision and recall at k metrics for each user.'''

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    precision = (sum(prec for prec in precisions.values()) / len(precisions))
    recall = (sum(rec for rec in recalls.values()) / len(recalls))

    return precision, recall

In [8]:
def recommendation(algo, trainset, testset):
  # Train the algorithm on the trainset, and predict ratings for the testset
  start_fit = time.time()
  algo.fit(trainset)
  end_fit = time.time()
  fit_time = end_fit - start_fit

  # Predictions on testing set
  start_test = time.time()
  test_predictions = algo.test(testset)
  end_test = time.time()
  test_time = end_test - start_test

  test_rmse = accuracy.rmse(test_predictions)
  test_mae = accuracy.mae(test_predictions)

  top_n, org_ratings = get_top_n(test_predictions, 5)

  precision, recall = precision_recall_at_k(test_predictions)

  f_measure = (2*precision*recall)/(precision+recall)

  ndcg_scores = dict()
  for uid, user_ratings in top_n.items():
    scores = []
    for iid, est_r in user_ratings:
        iid_found = False
        org_user_ratings = org_ratings[uid]
        for i, r in org_user_ratings:
            if iid == i:
                scores.append(r)
                iid_found = True
                break
        if not iid_found:
            scores.append(0)
    ndcg_scores[uid] = ndcg_at_k(scores)
  ndcg_score = sum(ndcg for ndcg in ndcg_scores.values())/len(ndcg_scores)

  return (test_rmse, test_mae, fit_time, test_time, precision, recall, f_measure, ndcg_score,test_predictions)

## Basic algorithms

In [9]:
surprise_df = pd.DataFrame(columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])

In [11]:
# Iterate over all algorithms
for algorithm in [KNNBasic(), KNNWithMeans(), SVD(), SVDpp()]:
    results = recommendation(algorithm,trainset,testset) 
    
    name =str(algorithm).split(' ')[0].split('.')[-1]
    print("Algorithm:", name)
    df = pd.DataFrame([[name, results[0], results[1], results[2], results[3], results[4], results[5], results[6], results[7]]], columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])
    surprise_df = pd.concat([df, surprise_df], ignore_index=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9642
MAE:  0.7417
Algorithm: KNNBasic
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9146
MAE:  0.6987
Algorithm: KNNWithMeans
RMSE: 0.8929
MAE:  0.6875
Algorithm: SVD
RMSE: 0.8852
MAE:  0.6784
Algorithm: SVDpp


In [19]:
results = surprise_df.drop(index=[6,2,1])
results

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656


In [20]:
results.sort_values(by='test_rmse', ascending = False)

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812


In [21]:
results.sort_values(by='Precision') 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812


In [23]:
results.sort_values(by='Recall', ascending=False) 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656


In [24]:
results.sort_values(by='NDCG', ascending=False)

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656


In [25]:
results.sort_values(by='Algorithm')

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
3,KNNBasic,0.9642,0.741681,0.072833,0.856049,0.791307,0.442862,0.567896,0.964024
5,KNNWithMeans,0.914592,0.698736,0.077076,0.927917,0.816692,0.394222,0.53176,0.960656
4,SVD,0.893231,0.688007,3.229208,0.111426,0.819548,0.401022,0.53853,0.961726
0,SVDpp,0.885218,0.678399,298.373213,3.735361,0.824938,0.401856,0.540444,0.962812


In [27]:
results.to_csv('../2_results/2_collab_filtering@surprise_recs_result.csv', index = False)

In [29]:
results[['Algorithm', 'test_rmse', 'test_mae', 'Precision', 'Recall', 'NDCG']].sort_values(by='Algorithm')

Unnamed: 0,Algorithm,test_rmse,test_mae,Precision,Recall,NDCG
3,KNNBasic,0.9642,0.741681,0.791307,0.442862,0.964024
5,KNNWithMeans,0.914592,0.698736,0.816692,0.394222,0.960656
4,SVD,0.893231,0.688007,0.819548,0.401022,0.961726
0,SVDpp,0.885218,0.678399,0.824938,0.401856,0.962812


## Tuned algorithms

In [42]:
tuned_df = pd.DataFrame(columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])

In [31]:
sim_options = {'name': 'pearson_baseline',
               'user_based': False  # compute  similarities between items
               }
knnwm_tuned = KNNWithMeans(k=50, sim_options=sim_options)

In [34]:
svd_tuned = SVD(n_factors= 25, n_epochs=10, lr_all=0.01, reg_all=0.02)

In [37]:
svdpp_tuned = SVDpp(n_factors= 25, n_epochs=10, lr_all=0.01, reg_all=0.02)

In [43]:
# Iterate over all algorithms
for algorithm in [knnwm_tuned, svd_tuned, svdpp_tuned]:
    results = recommendation(algorithm,trainset,testset) 
    
    name =str(algorithm).split(' ')[0].split('.')[-1]
    print("Algorithm:", name)
    df = pd.DataFrame([[name, results[0], results[1], results[2], results[3], results[4], results[5], results[6], results[7]]], columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])
    tuned_df = pd.concat([df, tuned_df], ignore_index=True)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.9049
MAE:  0.6858
Algorithm: KNNWithMeans
RMSE: 0.8935
MAE:  0.6871
Algorithm: SVD
RMSE: 0.8880
MAE:  0.6819
Algorithm: SVDpp


In [44]:
tuned_df.sort_values(by='Algorithm')

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
2,KNNWithMeans,0.9049,0.685833,5.045171,3.738278,0.815499,0.417155,0.551962,0.963588
1,SVD,0.893531,0.687096,0.660161,0.07266,0.831321,0.402914,0.542767,0.962539
0,SVDpp,0.88796,0.681865,174.732694,3.919472,0.832464,0.401128,0.541385,0.963916


In [45]:
results_tuned.to_csv('../2_results/2_collab_filtering@surprise_tuned_recs_result.csv', index = False)

In [46]:
tuned_df[['Algorithm', 'test_rmse', 'test_mae', 'Precision', 'Recall', 'NDCG']]

Unnamed: 0,Algorithm,test_rmse,test_mae,Precision,Recall,NDCG
0,SVDpp,0.88796,0.681865,0.832464,0.401128,0.963916
1,SVD,0.893531,0.687096,0.831321,0.402914,0.962539
2,KNNWithMeans,0.9049,0.685833,0.815499,0.417155,0.963588
