### Reciprocal Rank Fusion (RRF)

In [1]:
import sys
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
os.chdir(project_root)

import pandas as pd
import polars as pl
from tecd_retail_recsys.data import DataPreprocessor
from tecd_retail_recsys.metrics import calculate_metrics

In [3]:
dp = DataPreprocessor(
    day_begin=1082, 
    day_end=1308, 
    val_days=20, 
    test_days=20, 
    min_user_interactions=1, 
    min_item_interactions=20
)
train_df, val_df, test_df = dp.preprocess()
trainval_df = pd.concat([train_df, val_df], ignore_index=True)

joined = dp.get_grouped_data(train_df, val_df, test_df)
joined['train_val_interactions'] = joined['train_interactions'] + joined['val_interactions']

Starting data preprocessing...
Loading events from t_ecd_small_partial/dataset/small/retail/events
Loaded 236,479,226 total events
Loading items data from t_ecd_small_partial/dataset/small/retail/items.pq
Loaded 250,171 items with features: ['item_id', 'item_brand_id', 'item_category', 'item_subcategory', 'item_price', 'item_embedding']
Merged item features. Data shape: (236479226, 12)
Filtered to 3,758,762 events with action_type='added-to-cart'
After filtering (min_user_interactions=1, min_item_interactions=20): 3,249,972 events, 84,944 users, 30,954 items
Created mappings: 84944 users, 30954 items
Temporal split - Train: days < 1269 (902,543 events), Val: days 1269-1288 (228,339 events), Test: days >= 1289 (223,395 events)
Users in each part (train, val, test) - 7425


In [4]:
all_recs = pd.read_parquet("models_test/data/all_recs_test.parquet")
model_columns = all_recs.columns[all_recs.columns.str.endswith('_recs')].tolist()
for col in model_columns:
    if all_recs[col].dtype == 'object':
        all_recs[col] = all_recs[col].apply(
            lambda x: ast.literal_eval(x) if isinstance(x, str) else x
        )

In [5]:
from tecd_retail_recsys.utils import calculate_avg_prices, calculate_overall_avg_price, get_avg_recs_price, get_item_to_price
item_to_price = get_item_to_price(dp)

#### variant 1 - standart RRF

In [7]:
from tqdm import tqdm

def reciprocal_rank_fusion(all_recs, model_columns, k=20, top_n=100):
    """
    Объединяет ранжирования через RRF.
    
    RRF_score(item) = sum_over_models(1 / (k + rank_model(item)))
    
    Args:
        all_recs: DataFrame с рекомендациями моделей
        model_columns: список колонок с рекомендациями
        k: константа сглаживания
        top_n: количество финальных рекомендаций
    """
    print(f"Computing Reciprocal Rank Fusion with k={k}...")
    
    rrf_recommendations = []
    
    for idx, row in tqdm(all_recs.iterrows(), total=len(all_recs), desc="RRF"):
        user_id = row['user_id']
        
        item_scores = {}
        
        for model_col in model_columns:
            if model_col not in all_recs.columns:
                continue
                
            recs = row[model_col]
            
            for rank, item_id in enumerate(recs, start=1):
                rrf_score = 1.0 / (k + rank)
                
                if item_id not in item_scores:
                    item_scores[item_id] = 0.0
                
                item_scores[item_id] += rrf_score
        
        sorted_items = sorted(item_scores.items(), key=lambda x: x[1], reverse=True)
        top_items = [item_id for item_id, score in sorted_items[:top_n]]
        
        rrf_recommendations.append({
            'user_id': user_id,
            'rrf_recs': top_items
        })
    
    rrf_df = pd.DataFrame(rrf_recommendations)
    print(f"RRF recommendations generated for {len(rrf_df)} users")
    
    return rrf_df


rrf_recs = reciprocal_rank_fusion(all_recs, model_columns, k=20, top_n=100)


evaluation_df_rrf = joined.merge(rrf_recs, on='user_id', how='left')
evaluation_df_rrf['rrf_recs'] = evaluation_df_rrf['rrf_recs'].apply(
    lambda x: x if isinstance(x, list) else []
)

metrics_rrf = calculate_metrics(
    evaluation_df_rrf,
    train_col='train_val_interactions',
    gt_col='test_interactions',
    model_preds='rrf_recs',
    verbose=True
)

Computing Reciprocal Rank Fusion with k=20...


RRF: 100%|██████████| 7425/7425 [00:01<00:00, 6171.30it/s]


RRF recommendations generated for 7425 users
[Metrics debug] resolved gt_col='test_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (223395, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=9 pred_count=100 overlap=2
  user_id=14 gt_count=56 pred_count=100 overlap=7
  user_id=21 gt_count=43 pred_count=100 overlap=14

At k=10:
  MAP@10       = 0.1927
  NDCG@10      = 0.4620
  Precision@10 = 0.1970
  Recall@10    = 0.0637

At k=100:
  MAP@100       = 0.0906
  NDCG@100      = 0.3161
  Precision@100 = 0.0850
  Recall@100    = 0.2623

Other Metrics:
  MRR                 = 0.3078
  Catalog Coverage    = 0.9210
  Diversity     = 0.9965  [0=same recs for all, 1=unique recs]
  Novelty             = 0.7259
  Serendipity         = 0.0351


In [8]:
avg_rrf_recs_price_val = get_avg_recs_price(evaluation_df_rrf, item_to_price, 'rrf_recs')
print(f'Средняя цена рекомендаций модели RRF (v1) на валидации: {avg_rrf_recs_price_val:.2f}')

Средняя цена рекомендаций модели RRF (v1) на валидации: -3.85


#### variant 2 - WEIGHTED RRF

In [9]:
# === WEIGHTED RRF ===

def weighted_rrf(all_recs, model_columns, model_weights=None, k=20, top_n=100):
    """
    RRF с весами для каждой модели.
    Более сильные модели получают больший вес.
    """
    if model_weights is None:
        model_weights = {col: 1.0 for col in model_columns}
    
    print(f"Computing Weighted RRF with k={k}...")
    print(f"Model weights: {model_weights}")
    
    rrf_recommendations = []
    
    for idx, row in tqdm(all_recs.iterrows(), total=len(all_recs), desc="Weighted RRF"):
        user_id = row['user_id']
        item_scores = {}
        
        for model_col in model_columns:
            if model_col not in all_recs.columns:
                continue
            
            weight = model_weights.get(model_col, 1.0)
            recs = row[model_col]
            
            for rank, item_id in enumerate(recs, start=1):
                # Взвешенный RRF скор
                rrf_score = weight / (k + rank)
                
                if item_id not in item_scores:
                    item_scores[item_id] = 0.0
                
                item_scores[item_id] += rrf_score
        
        # Сортируем и берем топ-N
        sorted_items = sorted(item_scores.items(), key=lambda x: x[1], reverse=True)
        top_items = [item_id for item_id, score in sorted_items[:top_n]]
        
        rrf_recommendations.append({
            'user_id': user_id,
            'rrf_weighted_recs': top_items
        })
    
    return pd.DataFrame(rrf_recommendations)


# случайные веса
model_weights = {
    'bert4rec_recs': 2.0,
    'sasrec_recs': 2.0,
    'lightfm_recs': 1.5,
    'ials_recs': 1.2,
    'bivae_recs': 1.0,
    'bpr_recs': 1.0,
    'tifuknn_recs': 0.8,
    'toppersonal_recs': 0.5,
    'toppopular_recs': 0.3,
}

rrf_weighted = weighted_rrf(all_recs, model_columns, model_weights, k=20, top_n=100)

Computing Weighted RRF with k=20...
Model weights: {'bert4rec_recs': 2.0, 'sasrec_recs': 2.0, 'lightfm_recs': 1.5, 'ials_recs': 1.2, 'bivae_recs': 1.0, 'bpr_recs': 1.0, 'tifuknn_recs': 0.8, 'toppersonal_recs': 0.5, 'toppopular_recs': 0.3}


Weighted RRF: 100%|██████████| 7425/7425 [00:01<00:00, 4921.43it/s]


In [10]:
evaluation_df_rrf = joined.merge(rrf_weighted, on='user_id', how='left')
evaluation_df_rrf['rrf_weighted_recs'] = evaluation_df_rrf['rrf_weighted_recs'].apply(
    lambda x: x if isinstance(x, list) else []
)

metrics_rrf = calculate_metrics(
    evaluation_df_rrf,
    train_col='train_val_interactions',
    gt_col='test_interactions',
    model_preds='rrf_weighted_recs',
    verbose=True
)

[Metrics debug] resolved gt_col='test_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (223395, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=9 pred_count=100 overlap=2
  user_id=14 gt_count=56 pred_count=100 overlap=6
  user_id=21 gt_count=43 pred_count=100 overlap=14

At k=10:
  MAP@10       = 0.2342
  NDCG@10      = 0.5307
  Precision@10 = 0.2166
  Recall@10    = 0.0724

At k=100:
  MAP@100       = 0.0968
  NDCG@100      = 0.3194
  Precision@100 = 0.0793
  Recall@100    = 0.2473

Other Metrics:
  MRR                 = 0.3220
  Catalog Coverage    = 0.8963
  Diversity     = 0.9964  [0=same recs for all, 1=unique recs]
  Novelty             = 0.7932
  Serendipity         = 0.0306


In [11]:
# выставим корректные веса
# вес модели == ndcg@100 отдельной модели на валидации в отдельности
correct_model_weights = {
    'bert4rec_recs': 0.2774,
    'sasrec_recs': 0.1590,
    'lightfm_recs': 0.2218,
    'ials_recs': 0.0645,
    'bivae_recs': 0.162,
    'bpr_recs': 0.1956,
    'tifuknn_recs': 0.0752,
    'toppersonal_recs': 0.3455,
    'toppopular_recs': 0.0891,
}

rrf_weighted = weighted_rrf(all_recs, model_columns, correct_model_weights, k=20, top_n=100)

Computing Weighted RRF with k=20...
Model weights: {'bert4rec_recs': 0.2774, 'sasrec_recs': 0.159, 'lightfm_recs': 0.2218, 'ials_recs': 0.0645, 'bivae_recs': 0.162, 'bpr_recs': 0.1956, 'tifuknn_recs': 0.0752, 'toppersonal_recs': 0.3455, 'toppopular_recs': 0.0891}


Weighted RRF: 100%|██████████| 7425/7425 [00:01<00:00, 6142.90it/s]


In [12]:
evaluation_df_rrf = joined.merge(rrf_weighted, on='user_id', how='left')
evaluation_df_rrf['rrf_weighted_recs'] = evaluation_df_rrf['rrf_weighted_recs'].apply(
    lambda x: x if isinstance(x, list) else []
)

metrics_rrf = calculate_metrics(
    evaluation_df_rrf,
    train_col='train_val_interactions',
    gt_col='test_interactions',
    model_preds='rrf_weighted_recs',
    verbose=True
)

[Metrics debug] resolved gt_col='test_interactions' item_id_index=0
[Metrics debug] ratings_true shape: (223395, 3) ratings_pred shape: (742500, 3)
  ratings_true dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  ratings_pred dtypes: {'user_id': dtype('int64'), 'item_id': dtype('int64')}
  user_id=11 gt_count=9 pred_count=100 overlap=2
  user_id=14 gt_count=56 pred_count=100 overlap=6
  user_id=21 gt_count=43 pred_count=100 overlap=12

At k=10:
  MAP@10       = 0.3491
  NDCG@10      = 0.7018
  Precision@10 = 0.2824
  Recall@10    = 0.0927

At k=100:
  MAP@100       = 0.1330
  NDCG@100      = 0.3779
  Precision@100 = 0.0902
  Recall@100    = 0.2727

Other Metrics:
  MRR                 = 0.3832
  Catalog Coverage    = 0.9701
  Diversity     = 0.9967  [0=same recs for all, 1=unique recs]
  Novelty             = 0.7657
  Serendipity         = 0.0214


In [13]:
avg_rrf_recs_price_val = get_avg_recs_price(evaluation_df_rrf, item_to_price, 'rrf_weighted_recs')
print(f'Средняя цена рекомендаций модели RRF (v2) на валидации: {avg_rrf_recs_price_val:.2f}')

Средняя цена рекомендаций модели RRF (v2) на валидации: -3.85


```
Удалось наконец-то побить результат модели TopPersonal на тестовой выборке (0.3752)!
NDCG@100 с использованием взвешенного RRF = 0.3779.

Novelty=0.77 говорит о том, что есть хороший баланс между популярными товарами и новизной.
Serendipity в 0.02 говорит о том, что 2% рекомендаций оказываются неожиданными и релевантными, в то время как у модели TopPersonal Serendipity равняется нулю.

Средняя цена рекомендаций модели при этом увеличилась незначительно (на 0.01).
```