In [1]:
# Matrix Factorization Model Training & Evaluation

import sys
import os
sys.path.append(os.path.abspath(".."))

import pandas as pd
from surprise import Reader, Dataset
from src.models.mf_model import SVDRecommender
from src.utils.metrics import batch_eval

train = pd.read_csv("../data/sample/train.csv")
test = pd.read_csv("../data/sample/test.csv")

reader = Reader(rating_scale=(train.rating.min(), train.rating.max()))
data = Dataset.load_from_df(train[['userId', 'movieId', 'rating']], reader)
trainset = data.build_full_trainset()

algo = SVDRecommender()
algo.fit(trainset)
algo.save_model("../model_artifacts/svd_model.pkl")

users = test['userId'].unique()
test_dict = test.groupby("userId")["movieId"].apply(list).to_dict()

top_n = {}
for uid in users:
    top_n[uid] = algo.recommend(uid, train, n=10)

prec, recall, ndcg = batch_eval(users, test_dict, top_n, k=10)
print(f"SVD model: Precision@10={prec:.4f} Recall@10={recall:.4f} NDCG@10={ndcg:.4f}")


Model saved to ../model_artifacts/svd_model.pkl
SVD model: Precision@10=0.0327 Recall@10=0.0118 NDCG@10=0.0307
