In [None]:
!pip install surprise
!pip install recommenders
from surprise import SVD, SVDpp, Reader
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import KNNBasic
from surprise import accuracy
from surprise.model_selection import train_test_split, GridSearchCV 
import math
import numpy as np
import pandas as pd
import time
from recommenders.evaluation.python_evaluation import ndcg_at_k

In [None]:
def convert_traintest_dataframe_forsurprise(training_dataframe, testing_dataframe):
    reader = Reader(rating_scale=(0, 5))
    trainset = Dataset.load_from_df(training_dataframe[['userId', 'movieId', 'rating']], reader)
    testset = Dataset.load_from_df(testing_dataframe[['userId', 'movieId', 'rating']], reader)
    trainset = trainset.construct_trainset(trainset.raw_ratings)
    testset = testset.construct_testset(testset.raw_ratings)
    return trainset, testset

In [None]:
file_path_train = 'train_20.csv'
file_path_test = 'test_20.csv'
traindf = pd.read_csv(file_path_train)
testdf = pd.read_csv(file_path_test)


In [None]:
traindf.rename(columns={'user':'userId','movie':'movieId'},inplace=True)
testdf.rename(columns={'user':'userId','movie':'movieId'},inplace=True)

In [None]:
trainset, testset = convert_traintest_dataframe_forsurprise(traindf, testdf)

In [None]:
model = SVD(n_factors=80, reg_all=0.06, n_epochs=30, lr_all=0.01)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fb031f17e50>

In [None]:
preds = model.test(testset)

In [None]:
prediction_dict = []
for user, movie, true_rating, est, _ in preds:
  prediction_dict.append({"user":user, "movie":movie, "rating":true_rating, "prediction":est})

In [None]:
prediction_df = pd.DataFrame(prediction_dict)

In [None]:
top_n_test_data = prediction_df.groupby("user", as_index=False).apply(lambda x: x.nlargest(10, "rating")).reset_index(drop=True)

In [None]:
accuracy.rmse(preds)

RMSE: 0.8540


0.8539855907318256

In [None]:
accuracy.mae(preds)

MAE:  0.6557


0.6557282795500964

In [None]:
ndcg_at_k(top_n_test_data, prediction_df, col_user="user", col_item="movie", col_rating="rating", col_prediction="prediction", relevancy_method="top_k")

0.7174899837901276

In [None]:
mean_precision = 0
mean_recall = 0
for user in traindf["userId"].unique():
  test_set = prediction_df[prediction_df["user"] == user].reset_index()
  ratings_list = list(zip(test_set["rating"], test_set["prediction"]))
  ratings_list.sort(key=lambda x: x[1], reverse=True)

  total_relevant = sum([rat >= 4.0 for rat, pred in ratings_list])
  total_recommended = sum([pred >= 4.0 for rat, pred in ratings_list[:10]])
  total_rec_rel = sum([rat >= 4.0 and pred >= 4.0 for rat, pred in ratings_list[:10]])

  mean_precision += (total_rec_rel/total_recommended) if total_recommended > 0 else 0
  mean_recall += (total_rec_rel/total_relevant) if total_relevant > 0 else 0

print(f"Precision: {mean_precision/len(traindf['user_index'].unique())}")
print(f"Recall: {mean_recall/len(traindf['user_index'].unique())}")

Precision: 0.6299310434556336
Recall: 0.31090367944666286
