The notebook includes BPR-Opt model evaluation on MovieLens dataset using Precision@k, Recall@k and NDCG@k metrics.

The results are at the end of the notebook.

Additional description and explanation are in the report.

In [1]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent
sys.path.append(str(PROJECT_ROOT))

In [2]:
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

In [3]:
from src.data_reading import read_ratings_file
from src.evaluation import temporal_split, evaluate_precision_at_k, evaluate_recall_at_k, evaluate_ndcg_at_k
from src.models.bpr_opt import BPR_Opt

# Data Preparation

In [4]:
# For BPR-Opt we'll use only the file with the movie ratings, we will not need movie metadata or users' features

ratings = read_ratings_file()

In [5]:
# Reindex user_ids and movie_ids

user_map = {id: i for i, id in enumerate(ratings['user_id'].unique())}
item_map = {id: i for i, id in enumerate(ratings['movie_id'].unique())}

ratings['u'] = ratings['user_id'].map(user_map)
ratings['i'] = ratings['movie_id'].map(item_map)

In [6]:
# Split on train and test sets by date

train, test = temporal_split(ratings, test_ratio=0.1)

Train set size: (900188, 6)
Test set size: (100021, 6)
Train timeframe: 2000-04-25 23:05:32 - 2000-12-29 23:42:47
Test timeframe: 2000-12-29 23:43:34 - 2003-02-28 17:49:50


In [8]:
# We mark movies with ranking >= 4 as positive feedback, the rest of the rankings and non-seen items as negative feedback

train_pos_df = train[train.rating >= 4]
train_dict = train_pos_df.groupby('u')['i'].apply(set).to_dict()

test_pos_df = test[test.rating >= 4]

# From the test set, let's remove users and movies missing in the train set, as BPR-Opt model doesn't support cold-start

test_users = np.intersect1d(test.user_id.unique(), train.user_id.unique())
test_dict = test_pos_df[test_pos_df['user_id'].isin(test_users)].groupby('u')['i'].apply(set).to_dict()

# Model Training

In [9]:
model = BPR_Opt(
    n_users=len(user_map),
    n_items=len(item_map), 
    factors=32, 
    lr=0.01, 
    reg=0.001
)
model.fit(train_dict, n_epochs=10)

Epoch 1: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:56<00:00, 4455.27it/s]
Epoch 2: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:54<00:00, 4565.59it/s]
Epoch 3: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:54<00:00, 4549.15it/s]
Epoch 4: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:54<00:00, 4565.11it/s]
Epoch 5: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:53<00:00, 4578.21it/s]
Epoch 6: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:54<00:00, 4561.18it/s]
Epoch 7: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:53<00:00, 4581.71it/s]
Epoch 8: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:53<00:00, 4570.31it/s]
Epoch 9: 100%|███████████████████████████████████████████████████████| 520829/520829 [01:54<00:00, 4553.

# Model Evaluation

In [10]:
test_ = test[test['user_id'].isin(test_users)]

# Precision@k evaluation

evaluate_precision_at_k(
    test=test_,
    recommend_k_fn=model.recommend, 
    train_dict=train_dict,
    user_map=user_map,
    item_map=item_map,
    k=10
)

0.16245762711864406

In [11]:
# Recall@k evaluation

evaluate_recall_at_k(
    test=test_,
    recommend_k_fn=model.recommend, 
    train_dict=train_dict,
    user_map=user_map,
    item_map=item_map,
    k=10
)

0.0487580201653174

In [12]:
# NDCG@k evaluation

evaluate_ndcg_at_k(
    test=test_,
    recommend_k_fn=model.recommend, 
    train_dict=train_dict,
    user_map=user_map,
    item_map=item_map,
    k=10
)

np.float64(0.21161167638418008)