# BPR Model Training and Evaluation

Bayesian Personalized Ranking

In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import cornac
from recommenders.utils.timer import Timer
from recommenders.evaluation.python_evaluation import map, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.datasets.python_splitters import python_stratified_split

from tecd_retail_recsys.data import DataPreprocessor
from tecd_retail_recsys.metrics import calculate_metrics

print(f"System version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Cornac version: {cornac.__version__}")

System version: 3.11.14 (main, Oct 28 2025, 12:11:54) [Clang 20.1.4 ]
Pandas version: 2.3.3
Numpy version: 1.26.4
Cornac version: 2.3.5


In [32]:
# Load config
with open('configs/bpr.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Extract parameters
TOP_K = config['train']['top_k']
BATCH_SIZE = config['train']['batch_size']

K = config['model']['k']
MAX_ITER = config['model']['max_iter']
LEARNING_RATE = config['model']['learning_rate']
LAMBDA_REG = config['model']['lambda_reg']

MODEL_DIR = config['info']['MODEL_DIR']
SEED = DEFAULT_SEED

os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Model: BPR (Bayesian Personalized Ranking)")
print(f"Latent factors (k): {K}")
print(f"Max iterations: {MAX_ITER}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Lambda (regularization): {LAMBDA_REG}")
print(f"Batch size: {BATCH_SIZE}")

Model: BPR (Bayesian Personalized Ranking)
Latent factors (k): 8192
Max iterations: 2000
Learning rate: 0.001
Lambda (regularization): 0.001
Batch size: 1000


## Data Preparation

In [3]:
dp = DataPreprocessor(day_begin=1082, day_end=1308, val_days=20, test_days=20, min_user_interactions=1, min_item_interactions=20)
train_df, val_df, test_df = dp.preprocess()
val_df['rating'] = 1.0

# Keep original column names for later
train_orig = train_df.copy()
val_orig = val_df.copy()

# Rename columns for compatibility
train_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)
val_df.rename(columns={'user_id': 'userID', 'item_id': 'itemID'}, inplace=True)

print(f"Train shape: {train_df.shape}")
print(f"Val shape: {val_df.shape}")
print(f"Test shape: {test_df.shape}")
print(f"Number of users: {train_df['userID'].nunique()}")
print(f"Number of items: {train_df['itemID'].nunique()}")

Starting data preprocessing...
Loading events from t_ecd_small_partial/dataset/small/retail/events
Loaded 236,479,226 total events
Loading items data from t_ecd_small_partial/dataset/small/retail/items.pq
Loaded 250,171 items with features: ['item_id', 'item_brand_id', 'item_category', 'item_subcategory', 'item_price', 'item_embedding']
Merged item features. Data shape: (236479226, 12)
Filtered to 3,758,762 events with action_type='added-to-cart'
After filtering (min_user_interactions=1, min_item_interactions=20): 3,249,972 events, 84,944 users, 30,954 items
Created mappings: 84944 users, 30954 items
Temporal split - Train: days < 1269 (902,543 events), Val: days 1269-1288 (228,339 events), Test: days >= 1289 (223,395 events)
Users in each part (train, val, test) - 7425
Train shape: (902543, 12)
Val shape: (228339, 13)
Test shape: (223395, 12)
Number of users: 7425
Number of items: 30751


In [4]:
# Prepare data for Cornac
# Cornac expects tuples of (user_id, item_id, rating)
train_data = [(str(row['userID']), str(row['itemID']), 1.0) for _, row in train_df.iterrows()]
val_data = [(str(row['userID']), str(row['itemID']), 1.0) for _, row in val_df.iterrows()]

# Create Cornac dataset
train_set = cornac.data.Dataset.from_uir(train_data, seed=SEED)
print(f"Train set: {train_set.num_users} users, {train_set.num_items} items, {train_set.num_ratings} interactions")

# Build test set from validation data
val_set = cornac.data.Dataset.from_uir(val_data, seed=SEED)
print(f"Val set: {val_set.num_users} users, {val_set.num_items} items, {val_set.num_ratings} interactions")

Train set: 7425 users, 30751 items, 630733 interactions
Val set: 7425 users, 26716 items, 198681 interactions


## Model Training

In [33]:
# Initialize BPR model
model = cornac.models.BPR(
    k=K,
    max_iter=MAX_ITER,
    learning_rate=LEARNING_RATE,
    lambda_reg=LAMBDA_REG,
    seed=SEED,
    verbose=True
)

print("Model initialized successfully")

Model initialized successfully


In [34]:
# Train the model
with Timer() as train_time:
    model.fit(train_set)

print(f"Training took {train_time.interval:.2f} seconds")

  0%|          | 0/2000 [00:00<?, ?it/s]

Optimization finished!
Training took 5402.44 seconds


## Generate Recommendations

In [35]:
# Generate recommendations for all users
topk_scores = []

unique_users = train_df['userID'].unique()
print(f"Generating recommendations for {len(unique_users)} users...")

from tqdm import tqdm
for user_id in tqdm(unique_users):
    user_id_str = str(user_id)
    
    # Check if user exists in trained model
    if user_id_str not in train_set.uid_map:
        continue
    
    user_idx = train_set.uid_map[user_id_str]
    
    # Get all items
    all_items = list(train_set.iid_map.keys())
    
    # Score all items
    scores = []
    for item_id_str in all_items:
        item_idx = train_set.iid_map[item_id_str]
        score = model.score(user_idx, item_idx)
        scores.append((item_id_str, score))
    
    # Sort by score and take top-k
    scores.sort(key=lambda x: x[1], reverse=True)
    top_items = scores[:TOP_K]
    
    for item_id_str, score in top_items:
        topk_scores.append({
            'userID': user_id,
            'itemID': int(item_id_str),
            'prediction': score
        })

topk_scores = pd.DataFrame(topk_scores)
print(f"Generated {len(topk_scores)} recommendations")
topk_scores.head()

Generating recommendations for 7425 users...


100%|██████████| 7425/7425 [08:36<00:00, 14.37it/s]


Generated 742500 recommendations


Unnamed: 0,userID,itemID,prediction
0,79038,11413,6.941745
1,79038,18172,6.864316
2,79038,8476,6.49744
3,79038,8670,6.31594
4,79038,16450,6.281305


In [36]:
# Create recommendations list
recs = topk_scores.sort_values(by=['userID', 'prediction'], ascending=[True, False]).groupby('userID', as_index=False)['itemID'].apply(list)
recs.head()

Unnamed: 0,userID,itemID
0,11,"[18856, 11413, 8670, 30249, 368, 16561, 20587,..."
1,14,"[17934, 7982, 5631, 16668, 29228, 25997, 1683,..."
2,21,"[17934, 7982, 5631, 25997, 1683, 17396, 29228,..."
3,29,"[17934, 16790, 12315, 21895, 7982, 1245, 8589,..."
4,39,"[12315, 17750, 27827, 8589, 4691, 25061, 9442,..."


## Evaluation

In [37]:
# Evaluate using recommenders metrics
eval_map = map(val_df, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(val_df, topk_scores, k=TOP_K)
eval_precision = precision_at_k(val_df, topk_scores, k=TOP_K)
eval_recall = recall_at_k(val_df, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.034490
NDCG:	0.163820
Precision@K:	0.047146
Recall@K:	0.136948


## Save Model

In [27]:
# save model
model_path = os.path.join(MODEL_DIR, "bpr_model.pkl")
model.save(model_path)
print(f"Model saved to {model_path}")

BPR model is saved to ./models/bpr/bpr_model.pkl/BPR/2026-02-23_16-40-10-174156.pkl
Model saved to ./models/bpr/bpr_model.pkl


In [30]:
# save recommendations
recs_path = os.path.join(MODEL_DIR, "recommendations.parquet")
recs.to_parquet(recs_path)
print(f"Recommendations saved to {recs_path}")

Recommendations saved to ./models/bpr/recommendations.parquet


In [50]:
# best model final evaluation
joined = dp.get_grouped_data(train_orig, val_orig, test_df)
joined['train_val_interactions'] = joined['train_interactions'] + joined['val_interactions']

recs = pd.read_parquet(recs_path).explode('itemID')
recs['itemID'] = recs['itemID'].astype(np.int64)
recs = recs.groupby('userID', as_index=False)['itemID'].apply(list)
joined = joined.merge(recs, left_on='user_id', right_on='userID').rename(columns={'itemID': 'bpr_recs'})
metrics = calculate_metrics(joined, train_col='train_interactions', gt_col='val_interactions', model_preds='bpr_recs', verbose=True)

[Metrics debug] resolved gt_col='val_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (228339, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=22 pred_count=100 overlap=5
  user_id=14 gt_count=5 pred_count=100 overlap=0
    [ID spaces] gt sample=[9341, 16732, 17585, 28024, 30789] range=[9341, 30789] | rec sample=[182, 394, 1245, 1683, 1809] range=[182, 30825]
  user_id=21 gt_count=47 pred_count=100 overlap=8

At k=10:
  MAP@10       = 0.0944
  NDCG@10      = 0.2421
  Precision@10 = 0.1269
  Recall@10    = 0.0382

At k=100:
  MAP@100       = 0.0434
  NDCG@100      = 0.1956
  Precision@100 = 0.0587
  Recall@100    = 0.1718

Other Metrics:
  MRR                 = 0.2382
  Catalog Coverage    = 0.6973
  Diversity     = 0.9953  [0=same recs for all, 1=unique recs]
  Novelty             = 0.7642
  Serendipity    

<!DOCTYPE html>
<html>
<head>
    <style>
        table {
            border-collapse: collapse;
            width: 100%;
            font-family: Arial, sans-serif;
            margin: 20px 0;
        }
        th {
            background-color: #4CAF50;
            color: white;
            padding: 12px;
            text-align: left;
            border: 1px solid #ddd;
        }
        td {
            padding: 10px;
            border: 1px solid #ddd;
            text-align: left;
        }
        tr:nth-child(even) {
            background-color: #f2f2f2;
        }
        tr:hover {
            background-color: #ddd;
        }
        .best {
            background-color: #c8e6c9 !important;
            font-weight: bold;
        }
        .worst {
            background-color: #ffcdd2 !important;
        }
    </style>
</head>
<body>
    <h2>BPR: эксперименты</h2>
    <table>
        <thead>
            <tr>
                <th>Номер эксперимента</th>
                <th>k (latent factors)</th>
                <th>max_iter</th>
                <th>learning_rate</th>
                <th>lambda_reg</th>
                <th>batch_size</th>
                <th>NDCG@100</th>
            </tr>
        </thead>
        <tbody>
            <tr class="worst">
                <td>1</td>
                <td>200</td>
                <td>100</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td>0.087829</td>
            </tr>
            <tr>
                <td>2</td>
                <td>400</td>
                <td>500</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td>0.097784</td>
            </tr>
            <tr>
                <td>3</td>
                <td>512</td>
                <td>500</td>
                <td>0.001</td>
                <td>0.0001</td>
                <td>1000</td>
                <td>0.094424</td>
            </tr>
            <tr>
                <td>4</td>
                <td>1024</td>
                <td>1000</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td>0.142955</td>
            </tr>
            <tr>
                <td>5</td>
                <td>2048</td>
                <td>2000</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td>0.170215</td>
            </tr>
            <tr class="best">
                <td>6</td>
                <td>4096</td>
                <td>3000</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td><strong>0.195564</strong></td>
            </tr>
            <tr>
                <td>7</td>
                <td>8192</td>
                <td>2000</td>
                <td>0.001</td>
                <td>0.001</td>
                <td>1000</td>
                <td>0.163820</td>
            </tr>
        </tbody>
    </table>
</body>
</html>

`Наилучшая конфигурация смогла добиться NDCG@100 = 0.1956`