In [1]:
#GETTING TOP-N RECOMMANDATIONS FOR EACH USER
from collections import defaultdict
from surprise import SVD
from surprise import Dataset

def get_top_n(predictions, n=10):
    top_n = defaultdict(list)
    # first map the predictions to each user
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    #then sort the predictions for each user and retrieve the k higest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n             


In [2]:
#first train an svd algo on data set
data = Dataset.load_builtin()


In [3]:
# then split the data set into training dataset and test dataset
# split dataset
trainset = data.build_full_trainset()
#train the algorithm SVD on given dataset
#algorithm loaded
algo = SVD()
#train the algo
algo.fit(trainset)
#training is done

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1e410c90b88>

In [4]:
# split dataset into testset
testset = trainset.build_anti_testset()
# than predict ratings for all pairs (u,i) that are not int he training set
predictions = algo.test(testset)

top_n = get_top_n(predictions, n=10)

#print the recommended items for each user

for uid, user_ratings in top_n.items():
    print('user id ',uid, 'recommended item id',[iid for (iid,_) in user_ratings])

318', '192']
user id  751 recommended item id ['408', '318', '114', '169', '427', '191', '921', '528', '12', '1039']
user id  756 recommended item id ['408', '483', '114', '318', '64', '480', '127', '515', '187', '513']
user id  757 recommended item id ['603', '169', '178', '483', '12', '511', '114', '408', '357', '480']
user id  752 recommended item id ['89', '192', '603', '178', '98', '408', '604', '659', '114', '185']
user id  758 recommended item id ['408', '357', '246', '169', '530', '498', '647', '648', '178', '198']
user id  732 recommended item id ['603', '318', '64', '427', '191', '173', '496', '515', '12', '408']
user id  762 recommended item id ['50', '318', '114', '474', '963', '272', '313', '923', '169', '12']
user id  744 recommended item id ['169', '173', '408', '272', '313', '318', '134', '98', '199', '641']
user id  754 recommended item id ['318', '357', '178', '513', '408', '50', '528', '169', '174', '483']
user id  753 recommended item id ['479', '318', '178', '429',

In [5]:
#Precision@k = |{Recommend items that are relevant}|/|{Recommended items}|
#Recall@k = |{Recommended items are relevant}|/|{Relevant items}|
#An it is consideered relevant if its true rating rui is greater than a given threshold
#An item is considered recommended if its estiamted rating r'ui is greater than the threshold,
# # and if it is among hte k highest estimated ratigns

In [6]:
from collections import defaultdict
from surprise import Dataset
from surprise import SVD
from surprise.model_selection import KFold

In [18]:
def precision_recall_at_k(predictions, k=10, threshold = 3.5):
    '''return precision and recall at k metrics for each user'''
    #First map the predicions to each user
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in(predictions):
        user_est_true[uid].append((est,true_r))
    precisions = dict()
    recalls = dict()

    for uid, user_ratings, in user_est_true.items():
        # sort he user rating by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        #Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        #Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est,_) in user_ratings[:k])
    
        #Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@k: Proportion of recomended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        #recall@k: propottion of relevant items that are relevant
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
    return precisions, recalls 

In [23]:
data = Dataset.load_builtin()
kf = KFold(n_splits=5)
algo = SVD()

for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4)

    #precisions and recall can then be averaged over all users
    print('precisions',sum (prec for prec in precisions.values()) / len(precisions))
    print('recalls', sum(rec for rec in recalls.values()) / len(recalls))

precisions 0.874380750176928
recalls 0.2697210847829672
precisions 0.8853326256192495
recalls 0.2599267587059262
precisions 0.8771762208067929
recalls 0.2414060123947289
precisions 0.8866242038216554
recalls 0.25965411639378333
precisions 0.8739207360226465
recalls 0.26025250636546043
