In [10]:
#import libraries
from collections import defaultdict

from surprise import Reader, Dataset, SVD, accuracy
from surprise.model_selection import cross_validate, KFold
import pandas as pd
import numpy as np

In [11]:
#load data
#ratings = pd.read_csv('ratings.csv', sep='\t', encoding='latin-1', usecols=['user_id', 'movie_id', 'rating', 'timestamp'])
#reader = Reader()
#data = Dataset.load_from_df(ratings[['user_id', 'movie_id', 'rating']], reader)
data = Dataset.load_builtin('ml-100k')

In [12]:
# Calculate precision and recall for all users for a given threshold
# source: https://surprise.readthedocs.io/en/stable/FAQ.html#precision-recall-at-k-py
def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0
        
    return precisions, recalls

In [13]:
n_splits = 5
kf = KFold(n_splits=n_splits)
algo = SVD()

precision_per_threshold = []
recall_per_threshold = []
rmse_per_threshold = []

thresholds = [0.0, 2.5, 3.0, 3.5, 4.0, 4.5]
for threshold in thresholds:
    precision = []
    recall = []
    rmse = []
    current_fold = 1
    
    for trainset, testset in kf.split(data):
        algo.fit(trainset)
        predictions = algo.test(testset)
        precisions_fold, recalls_fold = precision_recall_at_k(predictions, k=20, threshold=threshold)
    
        avg_precision_fold = sum(prec for prec in precisions_fold.values()) / len(precisions_fold)
        avg_recall_fold = sum(rec for rec in recalls_fold.values()) / len(recalls_fold)
        
        #print(f' Fold {current_fold}')
        rmse_fold = accuracy.rmse(predictions, verbose=False)
        #print(f'RMSE: {round(rmse_fold, 4)}')
        #print(f'Precision: {round(avg_precision_fold, 4)}')
        #print(f'Recall: {round(avg_recall_fold, 4)}')
        #print('---------')
        precision.append(avg_precision_fold)
        recall.append(avg_recall_fold)
        rmse.append(rmse_fold)
        current_fold += 1

    avg_rmse = np.mean(np.array(rmse))
    avg_precision = np.mean(np.array(precision))
    avg_recall = np.mean(np.array(recall))

    rmse_per_threshold.append(avg_rmse)
    precision_per_threshold.append(avg_precision)
    recall_per_threshold.append(avg_recall)
        
    #print(f'Average RMSE: {round(avg_rmse, 4)}')
    #print(f'Average precision: {round(avg_precision, 4)}')
    #print(f'Average recall: {round(avg_recall, 4)}')

In [14]:
for i, threshold in enumerate(thresholds):
    print(f"Threshold = {threshold}")
    print(f"RMSE:\t\t  {rmse_per_threshold[i]}")
    print(f"Precision@20:\t  {precision_per_threshold[i]}")
    print(f"Recall@20:\t  {recall_per_threshold[i]}")
    print("--------------")

#print(precision_per_threshold)
#print(recall_per_threshold)
#print(rmse_per_threshold)

Threshold = 0.0
RMSE:		  0.9361121801884493
Precision@20:	  1.0
Recall@20:	  0.8386957433487373
--------------
Threshold = 2.5
RMSE:		  0.9360857335865557
Precision@20:	  0.8723142748472528
Recall@20:	  0.837493031471689
--------------
Threshold = 3.0
RMSE:		  0.9374030472903655
Precision@20:	  0.8862942616825729
Recall@20:	  0.7680618217290611
--------------
Threshold = 3.5
RMSE:		  0.9369399216617673
Precision@20:	  0.680459552045243
Recall@20:	  0.6300397458720297
--------------
Threshold = 4.0
RMSE:		  0.9369850341186285
Precision@20:	  0.6156015752187629
Recall@20:	  0.3169518057543163
--------------
Threshold = 4.5
RMSE:		  0.9369229271740924
Precision@20:	  0.2136317493120874
Recall@20:	  0.0961145255636427
--------------
