# Task 4

Для экспериментов с бейзлайнами будем использовать библиотеку cornac, т к там есть готовые функции для метрик, моделей, обучения и теста.

In [1]:
!pip install -q cornac

In [2]:
import pandas as pd
import numpy as np
from scipy import sparse
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import cornac as cn
from cornac.data.dataset import Dataset
from cornac.experiment.experiment import Experiment
from sklearn.preprocessing import MinMaxScaler

import random
import datetime

Метрики  
Для персонализированных рекомендаций товаров в интернет-магазине наиболее подходящими метриками являются те, которые отражают долю релевантных товаров среди рекомендованных и их ранжирование по релевантности. Такими метриками являются:  


*   Recall@k - доля релевантных элементов, которые были рекомендованы в топ-k рекомендациях из всех релевантных элементов для пользователя. Высокий Recall важен, чтобы не пропустить максимум подходящих рекомендаций.
*   Precision@k - доля релевантных элементов среди топ-k рекомендаций. Высокий Precision означает, что большинство рекомендованных элементов действительно интересны пользователю.
*   NDCG - метрика, которая учитывает как релевантность рекомендованных элементов, так и их позицию в списке рекомендаций. Более релевантные элементы на верхних позициях вносят больший вклад в значение NDCG. Отражает юзабилити рекомендаций.
*   HitRatio - доля пользователей, для которых хотя бы одна рекомендация была релевантной. Высокий Hit Ratio означает, что система полезна для большинства пользователей.
*   MAP - усредненная по всем пользователям средняя точность рекомендаций. Сочетает оценку точности для разных значений k (средняя по всем возможным k).

Эти метрики позволяют всесторонне оценить качество рекомендательной системы. Импортируем их из библиотеки, зададим k = 10

In [3]:
K = 20

precision_k = cn.metrics.Precision(k=K)
recall_k = cn.metrics.Recall(k=K)
ndcg_k = cn.metrics.NDCG(k=K)
hr_k = cn.metrics.HitRatio(k=K)
map_k = cn.metrics.MAP()

metrics = [
    precision_k,
    recall_k,
    ndcg_k,
    hr_k,
    map_k
]

Импортируем несколько моделей, на которых будем экспериментировать

In [4]:
from cornac.models.mf.recom_mf import MF
from cornac.models.svd.recom_svd import SVD
from cornac.models.pmf.recom_pmf import PMF
from cornac.models.nmf.recom_nmf import NMF
from cornac.models.bpr.recom_bpr import BPR

In [None]:
models = [
    SVD(),
    MF(),
    NMF(),
    PMF(),
    BPR()
]

Приведём данные к нужному виду

In [4]:
train_df = pd.read_csv('/content/drive/MyDrive/train_df.csv')
val_df = pd.read_csv('/content/drive/MyDrive/val_df.csv')
test_df = pd.read_csv('/content/drive/MyDrive/test_df.csv')
train_df.head()

Unnamed: 0,user_id,item_id,order_ts,order_date,time_diff
0,284402,3278,2023-01-01 00:00:05.000000,2023-01-01,
1,463218,2597,2023-01-01 00:00:08.000000,2023-01-01,
2,543,120,2023-01-01 00:00:12.000000,2023-01-01,
3,161063,516,2023-01-01 00:00:13.000000,2023-01-01,
4,43025,1285,2023-01-01 00:00:16.000000,2023-01-01,


In [27]:
# Количество уникальных пользователей и товаров
users = set(train_df['user_id'].unique()).union(set(val_df['user_id'].unique()), set(test_df['user_id'].unique()))
items = set(train_df['item_id'].unique()).union(set(val_df['item_id'].unique()), set(test_df['item_id'].unique()))

In [5]:
# Преобразуем строки столбца order_ts в дату-время
train_df['order_ts'] = pd.to_datetime(train_df['order_ts'])
val_df['order_ts'] = pd.to_datetime(val_df['order_ts'])
test_df['order_ts'] = pd.to_datetime(test_df['order_ts'])

Разделим валидационную и тестовую выборки на тёплых и холодных пользователей, сделаем для них отдельные бейзлайны

In [6]:
train_users = train_df['user_id'].unique()
cold_val_df = val_df[~val_df['user_id'].isin(train_users)]
warm_val_df = val_df[val_df['user_id'].isin(train_users)]
cold_test_df = test_df[~test_df['user_id'].isin(train_users)]
warm_test_df = test_df[test_df['user_id'].isin(train_users)]

In [13]:
# Количество пользователей и товаров
num_users = max(users) + 1
num_items = max(items) + 1

# Делаем маппинг, где id равны самим себе, потому что датасет был предварительно обработан и они равны индексам в списках уникальных товаров и пользователей
uid_map = {idx: idx for idx in users}
iid_map = {idx: idx for idx in items}

In [None]:
# Подсчитываем количество покупок каждого товара каждым пользователем
order_counts = train_df.groupby(['user_id', 'item_id']).size().reset_index(name='order_count')

# Создаем массивы индексов пользователей и товаров
user_indices = order_counts['user_id'].map(uid_map).values
item_indices = order_counts['item_id'].map(iid_map).values

# Создаем массив количества покупок
order_counts_arr = order_counts['order_count'].values
mask = order_counts_arr > 0
order_counts_arr[mask] = 1

# Создаем кортеж из трех массивов
uir_tuple = (user_indices, item_indices, order_counts_arr)

In [None]:
train_dataset = Dataset(num_users, num_items, uid_map, iid_map, uir_tuple)

In [None]:
# Подсчитываем количество покупок каждого товара каждым пользователем
order_counts = warm_val_df.groupby(['user_id', 'item_id']).size().reset_index(name='order_count')

# Создаем массивы индексов пользователей и товаров
user_indices = order_counts['user_id'].map(uid_map).values
item_indices = order_counts['item_id'].map(iid_map).values

# Создаем массив количества покупок
order_counts_arr = order_counts['order_count'].values
mask = order_counts_arr > 0
order_counts_arr[mask] = 1

# Создаем кортеж из трех массивов
uir_tuple = (user_indices, item_indices, order_counts_arr)

In [None]:
val_dataset = Dataset(num_users, num_items, uid_map, iid_map, uir_tuple)

In [None]:
# Подсчитываем количество покупок каждого товара каждым пользователем
order_counts = warm_test_df.groupby(['user_id', 'item_id']).size().reset_index(name='order_count')

# Создаем массивы индексов пользователей и товаров
user_indices = order_counts['user_id'].map(uid_map).values
item_indices = order_counts['item_id'].map(iid_map).values

# Создаем массив нормализованного количества покупок
order_counts_arr = order_counts['order_count'].values
mask = order_counts_arr > 0
order_counts_arr[mask] = 1

# Создаем кортеж из трех массивов
uir_tuple = (user_indices, item_indices, order_counts_arr)

In [None]:
test_dataset = Dataset(num_users, num_items, uid_map, iid_map, uir_tuple)

In [None]:
bm = cn.eval_methods.base_method.BaseMethod(data=train_dataset, rating_threshold=0.5)

bm.train_set = train_dataset
bm.val_set = val_dataset
bm.test_set = test_dataset

In [None]:
Experiment(eval_method=bm, models=models, metrics=metrics).run()


VALIDATION:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 |  Time (s)
--- + ----------- + ------ + ------- + ------------ + --------- + ---------
SVD |      0.0004 | 0.0012 |  0.0000 |       0.0000 |    0.0001 | 1602.0800
MF  |      0.0004 | 0.0013 |  0.0000 |       0.0000 |    0.0001 | 1610.4329
NMF |      0.0303 | 0.0038 |  0.0040 |       0.0015 |    0.0079 | 1629.9507
PMF |      0.5131 | 0.0713 |  0.1206 |       0.0449 |    0.1898 | 1755.7690
BPR |      0.5473 | 0.0880 |  0.1434 |       0.0533 |    0.2130 | 1513.7022

TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
SVD |      0.0003 | 0.0009 |  0.0000 |       0.0000 |    0.0001 |    7.0087 | 1226.0307
MF  |      0.0003 | 0.0010 |  0.0000 |       0.0000 |    0.0001 |    5.4543 | 1234.2082
NMF |      0.0197 | 0.0031 |  0.0030 |       0.0010 |    0.0067 |   58.9628 | 1239.5722


Лучше всего себя показали модели PMF, BPR, попробуем для них ещё задать параметр user_based=True (сначала вычисляется средняя производительность для каждого пользователя, затем полученные значения усредняются для получения окончательного результата)

In [None]:
Experiment(eval_method=bm, models=[PMF(), BPR()], metrics=metrics, user_based=True).run()


VALIDATION:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 |  Time (s)
--- + ----------- + ------ + ------- + ------------ + --------- + ---------
PMF |      0.5131 | 0.0713 |  0.1206 |       0.0449 |    0.1898 | 1867.4256
BPR |      0.5454 | 0.0861 |  0.1422 |       0.0531 |    0.2116 | 1648.6163

TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.4255 | 0.0648 |  0.1086 |       0.0320 |    0.1932 |  265.6671 | 1438.8638
BPR |      0.4641 | 0.0823 |  0.1332 |       0.0381 |    0.2195 |  448.1003 | 1322.8598



Результаты не изменились. Лучшее качество показала модель BPR, будем далее оптимизировать её. Подберём гиперпараметры для неё и объединим с другой моделью, предположительно - SR.

Для холодных пользователей за бейзлайн возьмём предсказание самых популярных за последние 2 недели товаров

In [None]:
cold_df = pd.concat([cold_val_df, cold_test_df], ignore_index=True)
date_start = cold_val_df.order_ts.min()

val_data_last = train_df[(train_df.order_ts >= date_start - datetime.timedelta(days=14)) & (train_df.order_ts < date_start)]
val_pop_rec = val_data_last[["user_id", "item_id"]].drop_duplicates().groupby("item_id").size()
# Составим список из 100 самых популярных товаров
val_pop_rec = val_pop_rec.sort_values()[-100:].index[::-1].to_numpy()

In [None]:
def precision_k(y_true, y_pred, k):
  TP = len(set(y_true) & set(y_pred))
  return TP / k

def recall_k(y_true, y_pred, k):
  TP = len(set(y_true) & set(y_pred))
  return TP / len(y_true)

In [None]:
k = 20

In [None]:
cold_df = cold_df.groupby('user_id')['item_id'].agg(lambda x: list(set(x))).reset_index()
cold_df.head()

Unnamed: 0,user_id,item_id
0,771376,"[817, 1217, 383, 471]"
1,771484,"[1066, 454, 447]"
2,773470,[24]
3,773560,"[5, 138, 144, 4631, 549, 1708, 179, 184, 1208,..."
4,773561,[341]


In [None]:
popular_recommendations = {}

precision_popular = []
recall_popular = []

for index, row in cold_df.iterrows():
  # Произвольно сэмплируем k популярных товаров из 100 самых популярных
  cur_rec = np.random.choice(val_pop_rec, k)

  cur_precision = precision_k(row['item_id'], cur_rec, k)
  cur_recall = recall_k(row['item_id'], cur_rec, k)

  precision_popular.append(cur_precision)
  recall_popular.append(cur_recall)

In [None]:
print(f'Precision@{k}: {np.mean(precision_popular)}, Recall@{k}: {np.mean(recall_popular)}')

Precision@20: 0.00997182887379764, Recall@20: 0.07593476900490265


Показатели метрик низкие, как и стоило ожидать, но и не нулевые, что хорошо

# Task 5

Попробуем обучить BSARec

In [None]:
!git clone -q https://github.com/LuckyHorseshoe-chan/BSARec.git

fatal: destination path 'BSARec' already exists and is not an empty directory.


In [None]:
%cd /content/BSARec/src

/content/BSARec/src


In [None]:
!python main.py  --data_name user_sequences \
                --lr 0.0005 \
                --alpha 0.7 \
                --c 5 \
                --num_attention_heads 1 \
                --train_name BSARec_user_sequences

2024-05-20 09:36:49,585 - Namespace(data_dir='./data/', output_dir='output/', data_name='user_sequences', do_eval=False, load_model=None, train_name='BSARec_user_sequences', num_items=10, num_users=508697, lr=0.0005, batch_size=256, epochs=200, no_cuda=False, log_freq=1, patience=10, num_workers=4, seed=42, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, gpu_id='0', variance=5, model_type='BSARec', max_seq_length=50, hidden_size=64, num_hidden_layers=2, hidden_act='gelu', num_attention_heads=1, attention_probs_dropout_prob=0.5, hidden_dropout_prob=0.5, initializer_range=0.02, c=5, alpha=0.7, cuda_condition=True, checkpoint_path='output/BSARec_user_sequences.pt', same_target_path='./data/user_sequences_same_target.npy', data_file='./data/test_user_sequences.txt', item_size=5026)
2024-05-20 09:36:49,585 - ./data/test_user_sequences.txt
2024-05-20 09:36:49,659 - BSARecModel(
  (item_embeddings): Embedding(5026, 64, padding_idx=0)
  (position_embeddings): Embedding(50, 64)
  (LayerNorm

Метрики изначально относительно неплохие, но за 30 эпох модель ничуть не обучилась, что нехорошо. Данное решение не подойдёт для улучшения бейзлайна

Подберём гиперпараметры для BPR. Для ускорения процесса тестировать качество будем на валидационной выборке

In [None]:
bpr = BPR(k=20, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5473 | 0.0879 |  0.1434 |       0.0534 |    0.2131 |  463.0999 | 1549.9049



In [None]:
bpr = BPR(k=20, learning_rate=0.05, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5612 | 0.0946 |  0.1500 |       0.0552 |    0.2232 |  495.4791 | 1554.4016



In [None]:
bpr = BPR(k=20, learning_rate=0.01, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5573 | 0.0903 |  0.1480 |       0.0555 |    0.2191 |  473.5401 | 1569.4845



In [None]:
bpr = BPR(k=20, learning_rate=0.1, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5393 | 0.0866 |  0.1377 |       0.0513 |    0.2091 |  469.1838 | 1591.2965



In [None]:
bpr = BPR(k=20, learning_rate=0.05, lambda_reg=0.001, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5593 | 0.0933 |  0.1482 |       0.0538 |    0.2228 |  470.5128 | 1576.8489



In [None]:
bpr = BPR(k=20, learning_rate=0.05, lambda_reg=0.1, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5275 | 0.0717 |  0.1222 |       0.0494 |    0.1976 |  474.9377 | 1594.7675



In [None]:
bpr = BPR(k=20, learning_rate=0.05, lambda_reg=0.05, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.5440 | 0.0812 |  0.1336 |       0.0517 |    0.2076 |  467.1473 | 1570.6540



Науличшее качество получилось при learning_rate=0.05 и дефолтном значении lambda_reg=0.01. Попробуем ещё немного поиграться с параметрами на RandomSearch сетке.

In [None]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

bpr = BPR(k=20, learning_rate=0.05, seed=123)
hr = cn.metrics.HitRatio(k=20)

# Random Search
rs_bpr = RandomSearch(
    model=bpr,
    space=[
        Continuous(name="learning_rate", low=0.01, high=0.1),
        Continuous(name="lambda_reg", low=0.001, high=0.01)
    ],
    metric=hr,
    eval_method=bm,
    n_trails=5,
)

In [None]:
cn.Experiment(
    eval_method=bm,
    models=[rs_bpr],
    metrics=[hr]
).run()


VALIDATION:
...
                 | HitRatio@20 | Time (s)
---------------- + ----------- + --------
RandomSearch_BPR |      0.5678 | 604.9820

TEST:
...
                 | HitRatio@20 | Train (s) | Test (s)
---------------- + ----------- + --------- + --------
RandomSearch_BPR |      0.4805 | 5415.4855 | 500.3786



In [None]:
rs_bpr.best_params

{'lambda_reg': 0.007268222670380756, 'learning_rate': 0.035752540145534153}

HR получилось немного выше (в доле тысячных), будем использовать эти параметры

In [None]:
bpr = BPR(k=20, lambda_reg=0.0073, learning_rate=0.0358, seed=123)
Experiment(eval_method=bm, models=[bpr], metrics=metrics, save_dir='/content/saved_models').run()


VALIDATION:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 |  Time (s)
--- + ----------- + ------ + ------- + ------------ + --------- + ---------
BPR |      0.5677 | 0.0970 |  0.1536 |       0.0561 |    0.2269 | 1814.0369

TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
BPR |      0.4805 | 0.0905 |  0.1412 |       0.0398 |    0.2301 |  596.2820 | 1391.5113



Подберём гиперпараметры для PMF

In [None]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

pmf = PMF(k=20, seed=123)
hr = cn.metrics.HitRatio(k=20)

# Random Search
rs_pmf = RandomSearch(
    model=pmf,
    space=[
        Continuous(name="learning_rate", low=0.01, high=0.1),
        Continuous(name="lambda_reg", low=0.001, high=0.1)
    ],
    metric=hr,
    eval_method=bm,
    n_trails=5,
)

In [None]:
cn.Experiment(
    eval_method=bm,
    models=[rs_pmf],
    metrics=[hr]
).run()


VALIDATION:
...
                 | HitRatio@20 | Time (s)
---------------- + ----------- + --------
RandomSearch_PMF |      0.0150 | 764.7602

TEST:
...
                 | HitRatio@20 | Train (s) | Test (s)
---------------- + ----------- + --------- + --------
RandomSearch_PMF |      0.0106 | 8673.3622 | 626.7428



Рандомная сетка дала плохие показатели, потестим вручную разные значения learning_rate

In [None]:
pmf = PMF(k=20, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics).run()


VALIDATION:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 |  Time (s)
--- + ----------- + ------ + ------- + ------------ + --------- + ---------
PMF |      0.4840 | 0.0658 |  0.1088 |       0.0398 |    0.1708 | 1942.8851

TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.3954 | 0.0594 |  0.0972 |       0.0283 |    0.1734 |  949.2011 | 1464.7798



In [None]:
pmf = PMF(k=20, learning_rate=0.005, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.3184 | 0.0312 |  0.0507 |       0.0207 |    0.0899 |  931.2264 | 1939.2056



In [None]:
pmf = PMF(k=20, learning_rate=0.0005, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics, save_dir='/content/saved_models').run()


VALIDATION:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 |  Time (s)
--- + ----------- + ------ + ------- + ------------ + --------- + ---------
PMF |      0.5131 | 0.0713 |  0.1206 |       0.0449 |    0.1898 | 1621.5927

TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.4255 | 0.0648 |  0.1086 |       0.0320 |    0.1932 |  822.3540 | 1262.4644



In [None]:
pmf = PMF(k=20, learning_rate=0.05, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.0048 | 0.0019 |  0.0009 |       0.0002 |    0.0013 |  989.4950 | 2125.0852



In [None]:
pmf = PMF(k=20, learning_rate=0.01, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.4377 | 0.0485 |  0.0881 |       0.0331 |    0.1432 |  974.2437 | 2093.5363



In [None]:
pmf = PMF(k=20, learning_rate=0.1, seed=123)
Experiment(eval_method=bm, models=[pmf], metrics=metrics).run()


TEST:
...
    | HitRatio@20 |    MAP | NDCG@20 | Precision@20 | Recall@20 | Train (s) |  Test (s)
--- + ----------- + ------ + ------- + ------------ + --------- + --------- + ---------
PMF |      0.0036 | 0.0014 |  0.0005 |       0.0002 |    0.0009 |  969.8029 | 2123.7696



Наилучшим качество метрик получилось при learning_rate=0.0005. Поиграемся с lambda_reg на рандомной сетке.

In [None]:
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

pmf = PMF(k=20, seed=123)
hr = cn.metrics.HitRatio(k=20)

# Random Search
rs_pmf = RandomSearch(
    model=pmf,
    space=[
        Continuous(name="lambda_reg", low=0.001, high=0.1)
    ],
    metric=hr,
    eval_method=bm,
    n_trails=5,
)

In [None]:
cn.Experiment(
    eval_method=bm,
    models=[rs_pmf],
    metrics=[hr]
).run()


VALIDATION:
...
                 | HitRatio@20 | Time (s)
---------------- + ----------- + --------
RandomSearch_PMF |      0.4092 | 673.3587

TEST:
...
                 | HitRatio@20 | Train (s) | Test (s)
---------------- + ----------- + --------- + --------
RandomSearch_PMF |      0.3257 | 8021.7246 | 562.3227



Показатели не улучшились, так что будем использовать learning_rate=0.0005 и дефолтный lambda_reg=0.001

Возьмём код из cornac и объединим модели по формуле $score = \lambda \cdot score_{bpr} + (1 - \lambda) \cdot score_{pmf}$

In [6]:
def rank(model1, model2, user_idx, item_indices, k=-1, **kwargs):
        """Rank all test items for a given user.

        Parameters
        ----------
        user_idx: int, required
            The index of the user for whom to perform item raking.

        item_indices: 1d array, optional, default: None
            A list of candidate item indices to be ranked by the user.
            If `None`, list of ranked known item indices and their scores will be returned.

        k: int, required
            Cut-off length for recommendations, k=-1 will return ranked list of all items.
            This is more important for ANN to know the limit to avoid exhaustive ranking.

        Returns
        -------
        (ranked_items, item_scores): tuple
            `ranked_items` contains item indices being ranked by their scores.
            `item_scores` contains scores of items corresponding to index in `item_indices` input.

        """
        # obtain item scores from the model
        try:
            all_item_scores1 = model1.score(user_idx, **kwargs)
            all_item_scores2 = model2.score(user_idx, **kwargs)
            all_item_scores = 0.9 * all_item_scores1 + 0.1 * all_item_scores2
        except:
            all_item_scores = np.ones(model1.total_items) * model1.default_score()

        item_scores = all_item_scores[item_indices]

        if k != -1:  # O(n + k log k), faster for small k which is usually the case
            partitioned_idx = np.argpartition(item_scores, -k)
            top_k_idx = partitioned_idx[-k:]
            sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
            partitioned_idx[-k:] = sorted_top_k_idx
            ranked_items = item_indices[partitioned_idx[::-1]]
        else:  # O(n log n)
            ranked_items = item_indices[item_scores.argsort()[::-1]]

        return ranked_items, item_scores

In [None]:
def ranking_eval(
    model1,
    model2,
    metrics,
    train_set,
    test_set,
    val_set=None,
    rating_threshold=1.0,
    exclude_unknowns=True,
    verbose=False,
):
    """Evaluate model on provided ranking metrics.

    Parameters
    ----------
    model: :obj:`cornac.models.Recommender`, required
        Recommender model to be evaluated.

    metrics: :obj:`iterable`, required
        List of rating metrics :obj:`cornac.metrics.RankingMetric`.

    train_set: :obj:`cornac.data.Dataset`, required
        Dataset to be used for model training. This will be used to exclude
        observations already appeared during training.

    test_set: :obj:`cornac.data.Dataset`, required
        Dataset to be used for evaluation.

    val_set: :obj:`cornac.data.Dataset`, optional, default: None
        Dataset to be used for model selection. This will be used to exclude
        observations already appeared during validation.

    rating_threshold: float, optional, default: 1.0
        The threshold to convert ratings into positive or negative feedback.

    exclude_unknowns: bool, optional, default: True
        Ignore unknown users and items during evaluation.

    verbose: bool, optional, default: False
        Output evaluation progress.

    Returns
    -------
    res: (List, List)
        Tuple of two lists:
         - average result for each of the metrics
         - average result per user for each of the metrics

    """

    if len(metrics) == 0:
        return [], []

    max_k = max(m.k for m in metrics)

    avg_results = []
    user_results = [{} for _ in enumerate(metrics)]

    test_mat = test_set.csr_matrix
    train_mat = train_set.csr_matrix
    val_mat = None if val_set is None else val_set.csr_matrix

    def pos_items(csr_row):
        return [
            item_idx
            for (item_idx, rating) in zip(csr_row.indices, csr_row.data)
            if rating >= rating_threshold
        ]

    test_user_indices = set(test_set.uir_tuple[0])
    for user_idx in tqdm(
        test_user_indices, desc="Ranking", disable=not verbose, miniters=100
    ):
        test_pos_items = pos_items(test_mat.getrow(user_idx))
        if len(test_pos_items) == 0:
            continue

        # binary mask for ground-truth positive items
        u_gt_pos_mask = np.zeros(test_set.num_items, dtype="int")
        u_gt_pos_mask[test_pos_items] = 1

        val_pos_items = [] if val_mat is None else pos_items(val_mat.getrow(user_idx))
        train_pos_items = (
            pos_items(train_mat.getrow(user_idx))
            if user_idx < train_mat.shape[0]
            else []
        )

        # binary mask for ground-truth negative items, removing all positive items
        u_gt_neg_mask = np.ones(test_set.num_items, dtype="int")
        u_gt_neg_mask[test_pos_items + val_pos_items + train_pos_items] = 0

        # filter items being considered for evaluation
        if exclude_unknowns:
            u_gt_pos_mask = u_gt_pos_mask[: train_set.num_items]
            u_gt_neg_mask = u_gt_neg_mask[: train_set.num_items]

        item_indices = np.nonzero(u_gt_pos_mask + u_gt_neg_mask)[0]
        u_gt_pos_items = np.nonzero(u_gt_pos_mask)[0]
        u_gt_neg_items = np.nonzero(u_gt_neg_mask)[0]

        item_rank, item_scores = rank(
            model1, model2, user_idx=user_idx, item_indices=item_indices, k=max_k
        )

        for i, mt in enumerate(metrics):
            mt_score = mt.compute(
                gt_pos=u_gt_pos_items,
                gt_neg=u_gt_neg_items,
                pd_rank=item_rank,
                pd_scores=item_scores,
                item_indices=item_indices,
            )
            user_results[i][user_idx] = mt_score

    # avg results of ranking metrics
    for i, mt in enumerate(metrics):
        avg_results.append(sum(user_results[i].values()) / len(user_results[i]))

    return avg_results, user_results

Поэкспериментируем с разными значениями lamdda

In [None]:
from collections import OrderedDict

# lamdda = 0.5

bpr = BPR.load("saved_models/BPR/")
pmf = PMF.load("saved_models/PMF/")
avg_results, user_results = ranking_eval(
    bpr,
    pmf,
    metrics,
    train_dataset,
    test_dataset,
    val_dataset,
    rating_threshold=0.5,
    exclude_unknowns=True,
    verbose=False,
)
metric_avg_results = OrderedDict()
metric_user_results = OrderedDict()
for i, mt in enumerate(metrics):
  metric_avg_results[mt.name] = avg_results[i]
  metric_user_results[mt.name] = user_results[i]
metric_avg_results

OrderedDict([('Precision@20', 0.036820791844880095),
             ('Recall@20', 0.21803919593366988),
             ('NDCG@20', 0.12943697007593605),
             ('HitRatio@20', 0.46710897008537816),
             ('MAP', 0.08166661930321538)])

In [None]:
from collections import OrderedDict

# lamdda = 0.7

bpr = BPR.load("/content/drive/MyDrive/saved_models/BPR/")
pmf = PMF.load("/content/drive/MyDrive/saved_models/PMF/")
avg_results, user_results = ranking_eval(
    bpr,
    pmf,
    metrics,
    train_dataset,
    test_dataset,
    val_dataset,
    rating_threshold=0.5,
    exclude_unknowns=True,
    verbose=False,
)
metric_avg_results = OrderedDict()
metric_user_results = OrderedDict()
for i, mt in enumerate(metrics):
  metric_avg_results[mt.name] = avg_results[i]
  metric_user_results[mt.name] = user_results[i]
metric_avg_results

OrderedDict([('Precision@20', 0.04004193638560071),
             ('Recall@20', 0.23424738886463292),
             ('NDCG@20', 0.14335617839364692),
             ('HitRatio@20', 0.491264328265124),
             ('MAP', 0.09118997880649016)])

In [None]:
from collections import OrderedDict

# lamdda = 0.8

bpr = BPR.load("/content/drive/MyDrive/saved_models/BPR/")
pmf = PMF.load("/content/drive/MyDrive/saved_models/PMF/")
avg_results, user_results = ranking_eval(
    bpr,
    pmf,
    metrics,
    train_dataset,
    test_dataset,
    val_dataset,
    rating_threshold=0.5,
    exclude_unknowns=True,
    verbose=False,
)
metric_avg_results = OrderedDict()
metric_user_results = OrderedDict()
for i, mt in enumerate(metrics):
  metric_avg_results[mt.name] = avg_results[i]
  metric_user_results[mt.name] = user_results[i]
metric_avg_results

OrderedDict([('Precision@20', 0.040887653497791804),
             ('Recall@20', 0.23744733179916816),
             ('NDCG@20', 0.1475151361357626),
             ('HitRatio@20', 0.4948149422568227),
             ('MAP', 0.09437564165201619)])

In [None]:
from collections import OrderedDict

# lamdda = 0.9

bpr = BPR.load("/content/drive/MyDrive/saved_models/BPR/")
pmf = PMF.load("/content/drive/MyDrive/saved_models/PMF/")
avg_results, user_results = ranking_eval(
    bpr,
    pmf,
    metrics,
    train_dataset,
    test_dataset,
    val_dataset,
    rating_threshold=0.5,
    exclude_unknowns=True,
    verbose=False,
)
metric_avg_results = OrderedDict()
metric_user_results = OrderedDict()
for i, mt in enumerate(metrics):
  metric_avg_results[mt.name] = avg_results[i]
  metric_user_results[mt.name] = user_results[i]
metric_avg_results

OrderedDict([('Precision@20', 0.04088808361456937),
             ('Recall@20', 0.23659542066127975),
             ('NDCG@20', 0.147157620116962),
             ('HitRatio@20', 0.4920428396309598),
             ('MAP', 0.09437119410218098)])

Лучше всего метрики получились при lambda = 0.9, немного лучше, чем до объединения моделей.  
Итоговая таблица:

In [3]:
import pandas as pd

# Создаем данные для таблицы
data = {
    'Model': ['BPR', 'BPR', 'PMF', 'PMF', 'BPR+PMF', 'Cold start'],
    'Learning rate': ['0.001', '0.0358', '0.001', '0.0005', '-', '-'],
    'Lambda reg': ['0.01', '0.0073', '0.001', '0.001', '-', '-'],
    'HitRatio@20': ['0.4657', '0.4805', '0.3954', '0.4255', '0.4920', '-'],
    'MAP': ['0.0823', '0.0905', '0.0594', '0.0648', '0.0944', '-'],
    'NDCG@20': ['0.1327', '0.1412', '0.0972', '0.1086', '0.1472', '-'],
    'Precision@20': ['0.0382', '0.0398', '0.0283', '0.0320', '0.0409', '0.0100'],
    'Recall@20': ['0.2206', '0.2301', '0.1734', '0.1932', '0.2366', '0.0760']
}

# Создаем DataFrame из данных
df = pd.DataFrame(data)

# Отображаем таблицу
display(df)


Unnamed: 0,Model,Learning rate,Lambda reg,HitRatio@20,MAP,NDCG@20,Precision@20,Recall@20
0,BPR,0.001,0.01,0.4657,0.0823,0.1327,0.0382,0.2206
1,BPR,0.0358,0.0073,0.4805,0.0905,0.1412,0.0398,0.2301
2,PMF,0.001,0.001,0.3954,0.0594,0.0972,0.0283,0.1734
3,PMF,0.0005,0.001,0.4255,0.0648,0.1086,0.032,0.1932
4,BPR+PMF,-,-,0.4920,0.0944,0.1472,0.0409,0.2366
5,Cold start,-,-,-,-,-,0.01,0.076
