In [2]:
from surprise import KNNBasic
from surprise import Dataset
from surprise import accuracy
import time
from surprise.model_selection import train_test_split


data = Dataset.load_builtin('ml-100k')
# sample random trainset and testset
# test set is made of 30% of the ratings.
trainset, testset = train_test_split(data, test_size=.30)

# We'll use the KNN algorithm.
algo = KNNBasic()

# Train the algorithm on the trainset, and predict ratings for the testset
start_fit = time.time()
algo.fit(trainset)

fit_time = time.time() - start_fit
start_test = time.time()
predictions = algo.test(testset)


#print(predictions)
test_time = time.time() - start_test
# Compute the metrics
accuracy.rmse(predictions)
accuracy.mae(predictions)
print("fit-time: " + str(fit_time))
print("test-time: " + str(test_time))

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9298
MAE:  0.7332
fit-time: 22.23573613166809
test-time: 142.27688431739807


In [7]:
#get NDCG
from sklearn.metrics import ndcg_score

def get_ndcg(surprise_predictions, k_highest_scores=None):
    """ 
    Calculates the ndcg (normalized discounted cumulative gain) from surprise predictions, using sklearn.metrics.ndcg_score and scipy.sparse
  
    Parameters: 
    surprise_predictions (List of surprise.prediction_algorithms.predictions.Prediction): list of predictions
    k_highest_scores (positive integer): Only consider the highest k scores in the ranking. If None, use all. 
  
    Returns: 
    float in [0., 1.]: The averaged NDCG scores over all recommendations
  
    """
    from sklearn.metrics import ndcg_score
    from scipy import sparse
    
    uids = [int(p.uid) for p in surprise_predictions ]
    iids = [int(p.iid) for p in surprise_predictions ]
    r_uis = [p.r_ui for p in surprise_predictions ]
    ests = [p.est for p in surprise_predictions ]
    
    assert(len(uids) == len(iids) == len(r_uis) == len(ests) )    
    
    sparse_preds = sparse.coo_matrix( (ests, (uids , iids )) )
    sparse_vals = sparse.coo_matrix( (r_uis, (uids , iids )) )
    
    dense_preds = sparse_preds.toarray()
    dense_vals = sparse_vals.toarray()
    
    return ndcg_score(y_true= dense_vals , y_score= dense_preds, k=k_highest_scores)

get_ndcg(predictions, k_highest_scores = 10)

0.8991846217009473