### Evaluations of Algorithms:

This notebook uses the surprise library to build a model using surprise library and generate ratings for the test data. \\
**Models:** SVD, Baseline estimation, item-item based KNN collaborative filtering etc are explored. \\
**Evaluation:** The test data ratings are stored for further evaluation such as RMSE and MAE. The recommendations are evaluated using precision@5, recall@5, NDCG and overall accuracy. 

In [1]:
from surprise import SVD, BaselineOnly, SVDpp, NMF, SlopeOne, CoClustering, Reader
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import KNNBaseline, KNNBasic, KNNWithMeans, KNNWithZScore
from surprise import accuracy
from surprise.model_selection import train_test_split

In [2]:
import math
from collections import defaultdict
import csv
from sklearn.metrics import ndcg_score
import numpy as np
import pandas as pd
import time

In [3]:
def convert_traintest_dataframe_forsurprise(training_dataframe, testing_dataframe):
    reader = Reader(rating_scale=(0, 5))
    trainset = Dataset.load_from_df(training_dataframe[['userId', 'movieId', 'rating']], reader)
    testset = Dataset.load_from_df(testing_dataframe[['userId', 'movieId', 'rating']], reader)
    trainset = trainset.construct_trainset(trainset.raw_ratings)
    testset = testset.construct_testset(testset.raw_ratings)
    return trainset, testset

In [4]:
data = pd.read_csv('C:/Users/prtyagi/Desktop/New folder/archive1/ratings_small.csv')
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.2)
trainset, testset = convert_traintest_dataframe_forsurprise(train_data, test_data)

In [5]:
def get_top_n(predictions, n):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    org_ratings = defaultdict(list)

    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
        org_ratings[uid].append((iid, true_r))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n, org_ratings

In [6]:
def dcg_at_k(scores):
    return scores[0] + sum(sc/math.log(ind, 2) for sc, ind in zip(scores[1:], range(2, len(scores) + 1)))

def ndcg_at_k(scores):
    idcg = dcg_at_k(sorted(scores, reverse=True))
    return (dcg_at_k(scores)/idcg) if idcg > 0.0 else 0.0

In [7]:
def precision_recall_at_k(predictions, k=5, threshold=3.5):
    '''Return precision and recall at k metrics for each user.'''

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    precision = (sum(prec for prec in precisions.values()) / len(precisions))
    recall = (sum(rec for rec in recalls.values()) / len(recalls))

    return precision, recall

In [8]:
def recommendation(algo, trainset, testset):
  # Train the algorithm on the trainset, and predict ratings for the testset
  start_fit = time.time()
  algo.fit(trainset)
  end_fit = time.time()
  fit_time = end_fit - start_fit

  # Predictions on testing set
  start_test = time.time()
  test_predictions = algo.test(testset)
  end_test = time.time()
  test_time = end_test - start_test

  test_rmse = accuracy.rmse(test_predictions)
  test_mae = accuracy.mae(test_predictions)

  top_n, org_ratings = get_top_n(test_predictions, 5)

  precision, recall = precision_recall_at_k(test_predictions)

  f_measure = (2*precision*recall)/(precision+recall)

  ndcg_scores = dict()
  for uid, user_ratings in top_n.items():
    scores = []
    for iid, est_r in user_ratings:
        iid_found = False
        org_user_ratings = org_ratings[uid]
        for i, r in org_user_ratings:
            if iid == i:
                scores.append(r)
                iid_found = True
                break
        if not iid_found:
            scores.append(0)
    ndcg_scores[uid] = ndcg_at_k(scores)
  ndcg_score = sum(ndcg for ndcg in ndcg_scores.values())/len(ndcg_scores)

  return (test_rmse, test_mae, fit_time, test_time, precision, recall, f_measure, ndcg_score,test_predictions)

#### Basic algorithm (Baseline approach):

In [9]:
# basic collaborative filtering algorithm taking into account a baseline rating.
sim_options = {'name': 'pearson_baseline',
               'user_based': False  # compute  similarities between items
               }
algo = KNNBaseline(sim_options=sim_options)

results = recommendation(algo,trainset,testset)
print(results[0])
print(results[1])
print(results[2])
print(results[3])
print(results[4])
print(results[5])
print(results[6])
print(results[7])

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.8694
MAE:  0.6649
0.8694262344242538
0.664860973744312
9.501438856124878
7.5373358726501465
0.8238450074515673
0.40798143702392775
0.5457156266330915
0.9638498598505412


In [10]:
algo = CoClustering(2,5,50)

test_rmse, test_mae, test_predictions, fit_time, test_time, precision, recall, f_measure, ndcg_score = recommendation(algo,trainset,testset)
print(test_rmse)
print(test_mae)
print(fit_time)
print(test_time)
print(precision)
print(recall)
print(f_measure)
print(ndcg_score)

RMSE: 0.9652
MAE:  0.7477
0.9652212974622101
0.7476611664694023
0.07810473442077637
0.7938648782911104
0.38584669021297546
0.5192966551194597
0.9566135879897293
[Prediction(uid=73, iid=1911, r_ui=3.0, est=2.6176535153635774, details={'was_impossible': False}), Prediction(uid=338, iid=590, r_ui=3.0, est=3.7489156582384258, details={'was_impossible': False}), Prediction(uid=463, iid=1676, r_ui=2.0, est=2.8587575437464086, details={'was_impossible': False}), Prediction(uid=275, iid=6796, r_ui=4.5, est=4.750822795161998, details={'was_impossible': False}), Prediction(uid=306, iid=2857, r_ui=3.0, est=3.8557737411800534, details={'was_impossible': False}), Prediction(uid=481, iid=50912, r_ui=4.0, est=4.047785961432363, details={'was_impossible': False}), Prediction(uid=614, iid=2849, r_ui=2.0, est=3.5441795932652527, details={'was_impossible': False}), Prediction(uid=75, iid=908, r_ui=4.5, est=3.660254041474108, details={'was_impossible': False}), Prediction(uid=405, iid=6166, r_ui=4.0, est=

In [11]:
surprise_df = pd.DataFrame(columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])

In [12]:
# Iterate over all algorithms
for algorithm in [KNNBasic(), SVD(), SVDpp(), SlopeOne(), NMF(), KNNBaseline(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
    results = recommendation(algorithm,trainset,testset) 
    
    name =str(algorithm).split(' ')[0].split('.')[-1]
    print("Algorithm:", name)
    df = pd.DataFrame([[name, results[0], results[1], results[2], results[3], results[4], results[5], results[6], results[7]]], columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])
    surprise_df = pd.concat([df, surprise_df], ignore_index=True)
surprise_df.sort_values(by='test_rmse', ascending=False) 

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9656
MAE:  0.7459
Algorithm: KNNBasic
RMSE: 0.8945
MAE:  0.6914
Algorithm: SVD
RMSE: 0.8777
MAE:  0.6751
Algorithm: SVDpp
RMSE: 0.9306
MAE:  0.7142
Algorithm: SlopeOne
RMSE: 0.9424
MAE:  0.7237
Algorithm: NMF
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8979
MAE:  0.6890
Algorithm: KNNBaseline
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9207
MAE:  0.7053
Algorithm: KNNWithMeans
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9181
MAE:  0.6993
Algorithm: KNNWithZScore
Estimating biases using als...
RMSE: 0.8882
MAE:  0.6881
Algorithm: BaselineOnly
RMSE: 0.9650
MAE:  0.7466
Algorithm: CoClustering


Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
9,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812
0,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
5,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817
6,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
3,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793
2,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
4,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
8,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
1,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
7,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548


In [13]:
surprise_df.sort_values(by='test_rmse') 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
7,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
1,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
8,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
4,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
2,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
3,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793
6,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
5,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817
0,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
9,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812


In [14]:
surprise_df.sort_values(by='F-measure', ascending=False) 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
9,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812
4,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
7,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
1,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
6,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
3,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793
0,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
2,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
8,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
5,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817


In [15]:
surprise_df.sort_values(by='NDCG', ascending=False)

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
8,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
9,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812
7,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
1,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
4,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
6,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
2,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
0,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
5,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817
3,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793


In [16]:
sim_options = {'name': 'pearson_baseline',
               'user_based': False  # compute  similarities between items
               }
algo = KNNBaseline(sim_options=sim_options)

results = recommendation(algo,trainset,testset)
df = pd.DataFrame([['KNNBaseline (pearson_baseline)', results[0], results[1], results[2], results[3], results[4], results[5], results[6], results[7]]], columns= ['Algorithm', 'test_rmse', 'test_mae', 'fit_time', 'test_time', 'Precision', 'Recall', 'F-measure', 'NDCG'])
surprise_df = pd.concat([df, surprise_df], ignore_index=True)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 0.8694
MAE:  0.6649


In [17]:
surprise_df.head()

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
0,KNNBaseline (pearson_baseline),0.869426,0.664861,9.473596,7.614614,0.823845,0.407981,0.545716,0.96385
1,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
2,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
3,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
4,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793


In [18]:
surprise_df.sort_values(by='test_rmse') 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
0,KNNBaseline (pearson_baseline),0.869426,0.664861,9.473596,7.614614,0.823845,0.407981,0.545716,0.96385
8,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
2,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
9,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
5,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
3,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
4,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793
7,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
6,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817
1,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582


In [19]:
surprise_df.sort_values(by='F-measure', ascending=False) 

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
10,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812
0,KNNBaseline (pearson_baseline),0.869426,0.664861,9.473596,7.614614,0.823845,0.407981,0.545716,0.96385
5,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
8,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
2,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
7,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
4,KNNWithMeans,0.920654,0.705341,0.236329,1.74718,0.803428,0.385767,0.521254,0.957793
1,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
3,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
9,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561


In [20]:
surprise_df.sort_values(by='NDCG', ascending=False)

Unnamed: 0,Algorithm,test_rmse,test_mae,fit_time,test_time,Precision,Recall,F-measure,NDCG
0,KNNBaseline (pearson_baseline),0.869426,0.664861,9.473596,7.614614,0.823845,0.407981,0.545716,0.96385
9,SVD,0.894477,0.691417,3.908932,0.143245,0.803924,0.382077,0.517978,0.963561
10,KNNBasic,0.965593,0.74587,0.192364,1.495094,0.782067,0.430068,0.554958,0.962812
8,SVDpp,0.877694,0.67512,437.387003,8.984456,0.828142,0.396382,0.536144,0.962548
2,BaselineOnly,0.888169,0.68815,0.265554,0.090422,0.802161,0.402565,0.536092,0.961945
5,KNNBaseline,0.897889,0.688994,0.509017,2.950768,0.795852,0.410523,0.541648,0.959545
7,SlopeOne,0.930613,0.714216,4.252706,7.492599,0.791058,0.39803,0.52959,0.958895
3,KNNWithZScore,0.91814,0.699338,0.286582,1.860059,0.8,0.384616,0.519481,0.958851
1,CoClustering,0.964952,0.746588,2.835403,0.140607,0.80077,0.384882,0.519886,0.9582
6,NMF,0.942432,0.723734,7.385584,0.218729,0.783731,0.383268,0.514789,0.95817
