In [None]:
# расскоментируйте код ниже, чтобы установить все зависимости
# !pip install -q \
#     pyarrow==12.0.1 \
#     polars==0.18.6 \
#     pandas==2.0.3 \
#     optuna==3.3.0 \
#     tqdm==4.65.0 \
#     numpy==1.24.3 \
#     redis==4.6.0 \
#     gensim==4.3.2

In [None]:
# раскоментируйте код ниже, чтобы скачать данные
# !wget -q https://files.grouplens.org/datasets/movielens/ml-100k.zip
# !unzip -q ml-100k.zip

In [1]:
import uuid
import redis
import optuna
import random

import polars as pl
import pandas as pd
import numpy as np
from tqdm import tqdm
from gensim.models import Word2Vec

from typing import List, Any

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ratings = pd.read_csv(
    'ml-100k/u.data', sep='\t',
    names=['user_id', 'item_id', 'rating', 'timestamp']
)
ratings = pl.from_pandas(ratings).filter(pl.col('rating') >= 4)
ratings

user_id,item_id,rating,timestamp
i64,i64,i64,i64
298,474,4,884182806
253,465,5,891628467
286,1014,5,879781125
200,222,5,876042340
122,387,5,879270459
291,1042,4,874834944
119,392,4,886176814
167,486,4,892738452
299,144,4,877881320
308,1,4,887736532


In [3]:
grouped_df = (
    ratings
    .groupby('user_id')
    .agg([
        pl.col('item_id').apply(lambda x: x[:-3]).alias('train_ids'),
        pl.col('rating').apply(lambda x: x[:-3]).alias('train_ratings'),
        pl.col('item_id').apply(lambda x: x[-3:]).alias('test_ids'),
        pl.col('rating').apply(lambda x: x[-3:]).alias('test_ratings'),
    ])
)
grouped_df

user_id,train_ids,train_ratings,test_ids,test_ratings
i64,list[i64],list[i64],list[i64],list[i64]
192,"[1061, 1160, … 100]","[4, 4, … 5]","[289, 9, 287]","[4, 5, 4]"
336,"[1047, 1057, … 1118]","[4, 4, … 4]","[273, 50, 173]","[5, 4, 5]"
160,"[234, 174, … 250]","[5, 5, … 4]","[488, 952, 127]","[5, 4, 5]"
256,"[452, 781, … 21]","[4, 5, … 4]","[829, 597, 230]","[4, 4, 4]"
928,"[8, 98, … 168]","[5, 5, … 5]","[191, 135, 9]","[5, 4, 5]"
384,"[272, 355, … 327]","[5, 4, … 4]","[879, 258, 286]","[4, 4, 4]"
32,"[1012, 249, … 7]","[4, 4, … 4]","[628, 50, 181]","[4, 4, 4]"
528,"[239, 58, … 202]","[5, 5, … 5]","[79, 213, 410]","[5, 4, 4]"
64,"[381, 736, … 732]","[4, 4, … 4]","[651, 503, 184]","[4, 4, 4]"
224,"[77, 69, … 570]","[4, 4, … 4]","[526, 378, 731]","[4, 4, 4]"


In [4]:
TOP_K = 10


def user_intersection(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> int:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: number of items in intersection of y_rel and y_rec (truncated to top-K)
    """
    return len(set(y_rec[:k]).intersection(set(y_rel)))


def user_hitrate(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> int:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: 1 if top-k recommendations contains at lease one relevant item
    """
    return int(user_intersection(y_rel, y_rec, k) > 0)


def user_precision(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> float:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: percentage of relevant items through recommendations
    """
    return user_intersection(y_rel, y_rec, k) / k


def user_ap(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> float:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: average precision metric for user recommendations
    """
    return np.sum([
        user_precision(y_rel, y_rec, idx + 1)
        for idx, item in enumerate(y_rec[:k]) if item in y_rel
    ]) / k

## Применим алгоритм w2v

Внутри gensim написан эффективный метод `predict_output_word`, позволяющий получать следующие возможные токены (в нашем случае объекты для рекомендации), перед фильтрацией просмотренного попросим его сгенерировать на `len(train_ids)` токенов больше (в худшем случае, нам придется все отфильтровать)

In [5]:
def evaluate_model(model):
    ap_list = []
    hitrate_list = []
    for train_ids, y_rel in grouped_df.select('train_ids', 'test_ids').rows():
        model_preds = model.predict_output_word(
            train_ids, topn=(TOP_K + len(train_ids))
        )
        if model_preds is None:
            hitrate_list.append(0)
            continue

        y_rec = [pred[0] for pred in model_preds if pred[0] not in train_ids]
        ap_list.append(user_ap(y_rel, y_rec))
        hitrate_list.append(user_hitrate(y_rel, y_rec))
    return np.mean(ap_list), np.mean(hitrate_list)

# обучим w2v с параметрами по умолчанию
model = Word2Vec(grouped_df['train_ids'].to_list())
mean_ap, mean_hitrate = evaluate_model(model)
print(f'MAP@{TOP_K} = {mean_ap:.4f} Hitrate@{TOP_K} = {mean_hitrate:.4f}')

MAP@10 = 0.0026 Hitrate@10 = 0.1083


## Подберем самые оптимальные гиперпараметры с помощью optuna

Дла алгоритма [W2V](https://radimrehurek.com/gensim/models/word2vec.html#gensim.models.word2vec.Word2Vec) рассмотрим следующие параметры:

- `sg` – 1, если использовать skip-gram, иначе cbow
- `window` – размер окна для обучения алгоритма w2v
- `ns_exponent` – степень популярности объектов, которая будет использована для negative sampling
- `negative` – количество негативных примеров для сэмплирования
- `min_count` – минимальное число взаимодействий, нужное для 
- `vector_size` – размерность эмбеддингов

In [6]:
SEED = 42

def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)

def objective(trial):
    sg = trial.suggest_categorical('sg', [0, 1])
    window = trial.suggest_int('window', 1, 10)
    ns_exponent = trial.suggest_float('ns_exponent', -3, 3)
    negative = trial.suggest_int('negative', 3, 20)
    min_count = trial.suggest_int('min_count', 0, 20)
    vector_size = trial.suggest_categorical('vector_size', [16, 32, 64, 128])
    
    print({
        'sg': sg,
        'window_len': window,
        'ns_exponent': ns_exponent,
        'negative': negative,
        'min_count': min_count,
        'vector_size': vector_size,
    })
    
    set_seed(SEED)
    model = Word2Vec(
        grouped_df['train_ids'].to_list(),
        window=window,
        sg=sg,
        hs=0,
        min_count=min_count,
        vector_size=vector_size,
        negative=negative,
        ns_exponent=ns_exponent,
        seed=SEED,
        epochs=10,
    )
    
    mean_ap, mean_hitrate = evaluate_model(model)
    print(f'MAP@{TOP_K} = {mean_ap:.4f} Hitrate@{TOP_K} = {mean_hitrate:.4f}')
    return mean_ap
    
    
study = optuna.create_study(directions=('maximize',))
study.optimize(objective, n_trials=100)

study.best_params

[I 2023-10-21 10:33:00,127] A new study created in memory with name: no-name-62dfb982-8983-4bce-9c11-da16b4298e3c


{'sg': 1, 'window_len': 4, 'ns_exponent': 2.7581226625078106, 'negative': 5, 'min_count': 3, 'vector_size': 32}


[I 2023-10-21 10:33:01,143] Trial 0 finished with value: 0.004109464707868963 and parameters: {'sg': 1, 'window': 4, 'ns_exponent': 2.7581226625078106, 'negative': 5, 'min_count': 3, 'vector_size': 32}. Best is trial 0 with value: 0.004109464707868963.


MAP@10 = 0.0041 Hitrate@10 = 0.1062
{'sg': 0, 'window_len': 2, 'ns_exponent': -1.3562907906098454, 'negative': 15, 'min_count': 5, 'vector_size': 64}


[I 2023-10-21 10:33:01,898] Trial 1 finished with value: 0.00873032759202972 and parameters: {'sg': 0, 'window': 2, 'ns_exponent': -1.3562907906098454, 'negative': 15, 'min_count': 5, 'vector_size': 64}. Best is trial 1 with value: 0.00873032759202972.


MAP@10 = 0.0087 Hitrate@10 = 0.1985
{'sg': 0, 'window_len': 9, 'ns_exponent': -1.581304645235897, 'negative': 14, 'min_count': 13, 'vector_size': 128}


[I 2023-10-21 10:33:02,823] Trial 2 finished with value: 0.011347180006754476 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -1.581304645235897, 'negative': 14, 'min_count': 13, 'vector_size': 128}. Best is trial 2 with value: 0.011347180006754476.


MAP@10 = 0.0113 Hitrate@10 = 0.2686
{'sg': 0, 'window_len': 7, 'ns_exponent': 1.5455542880572635, 'negative': 15, 'min_count': 11, 'vector_size': 32}


[I 2023-10-21 10:33:03,626] Trial 3 finished with value: 0.008971209050996286 and parameters: {'sg': 0, 'window': 7, 'ns_exponent': 1.5455542880572635, 'negative': 15, 'min_count': 11, 'vector_size': 32}. Best is trial 2 with value: 0.011347180006754476.


MAP@10 = 0.0090 Hitrate@10 = 0.2081
{'sg': 0, 'window_len': 8, 'ns_exponent': 1.165370535415807, 'negative': 18, 'min_count': 0, 'vector_size': 32}


[I 2023-10-21 10:33:04,552] Trial 4 finished with value: 0.004952803107058426 and parameters: {'sg': 0, 'window': 8, 'ns_exponent': 1.165370535415807, 'negative': 18, 'min_count': 0, 'vector_size': 32}. Best is trial 2 with value: 0.011347180006754476.


MAP@10 = 0.0050 Hitrate@10 = 0.1210
{'sg': 0, 'window_len': 7, 'ns_exponent': 1.6227163249640375, 'negative': 5, 'min_count': 2, 'vector_size': 64}


[I 2023-10-21 10:33:05,111] Trial 5 finished with value: 0.0048741979061128005 and parameters: {'sg': 0, 'window': 7, 'ns_exponent': 1.6227163249640375, 'negative': 5, 'min_count': 2, 'vector_size': 64}. Best is trial 2 with value: 0.011347180006754476.


MAP@10 = 0.0049 Hitrate@10 = 0.1200
{'sg': 0, 'window_len': 6, 'ns_exponent': -1.928800019867124, 'negative': 6, 'min_count': 7, 'vector_size': 32}


[I 2023-10-21 10:33:05,661] Trial 6 finished with value: 0.010258527524484971 and parameters: {'sg': 0, 'window': 6, 'ns_exponent': -1.928800019867124, 'negative': 6, 'min_count': 7, 'vector_size': 32}. Best is trial 2 with value: 0.011347180006754476.


MAP@10 = 0.0103 Hitrate@10 = 0.1996
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.42028306483831024, 'negative': 12, 'min_count': 13, 'vector_size': 32}


[I 2023-10-21 10:33:09,492] Trial 7 finished with value: 0.020928486997635935 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.42028306483831024, 'negative': 12, 'min_count': 13, 'vector_size': 32}. Best is trial 7 with value: 0.020928486997635935.


MAP@10 = 0.0209 Hitrate@10 = 0.4066
{'sg': 0, 'window_len': 3, 'ns_exponent': -1.7680193258615362, 'negative': 14, 'min_count': 0, 'vector_size': 64}


[I 2023-10-21 10:33:10,144] Trial 8 finished with value: 0.010633823032759204 and parameters: {'sg': 0, 'window': 3, 'ns_exponent': -1.7680193258615362, 'negative': 14, 'min_count': 0, 'vector_size': 64}. Best is trial 7 with value: 0.020928486997635935.


MAP@10 = 0.0106 Hitrate@10 = 0.2452
{'sg': 0, 'window_len': 10, 'ns_exponent': -2.216983845219264, 'negative': 5, 'min_count': 5, 'vector_size': 16}


[I 2023-10-21 10:33:10,685] Trial 9 finished with value: 0.010180555555555556 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -2.216983845219264, 'negative': 5, 'min_count': 5, 'vector_size': 16}. Best is trial 7 with value: 0.020928486997635935.


MAP@10 = 0.0102 Hitrate@10 = 0.2049
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.23067779689916335, 'negative': 10, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:33:15,124] Trial 10 finished with value: 0.026522500844309356 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.23067779689916335, 'negative': 10, 'min_count': 17, 'vector_size': 128}. Best is trial 10 with value: 0.026522500844309356.


MAP@10 = 0.0265 Hitrate@10 = 0.4650
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.2731512286661891, 'negative': 10, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:33:19,694] Trial 11 finished with value: 0.024716354272205338 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.2731512286661891, 'negative': 10, 'min_count': 19, 'vector_size': 128}. Best is trial 10 with value: 0.026522500844309356.


MAP@10 = 0.0247 Hitrate@10 = 0.4469
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.08306560873529606, 'negative': 9, 'min_count': 20, 'vector_size': 128}


[I 2023-10-21 10:33:24,010] Trial 12 finished with value: 0.030962470449172576 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.08306560873529606, 'negative': 9, 'min_count': 20, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0310 Hitrate@10 = 0.5287
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.4054975605520044, 'negative': 9, 'min_count': 20, 'vector_size': 128}


[I 2023-10-21 10:33:27,522] Trial 13 finished with value: 0.01964969604863222 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.4054975605520044, 'negative': 9, 'min_count': 20, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0196 Hitrate@10 = 0.3811
{'sg': 1, 'window_len': 5, 'ns_exponent': -2.823512498876477, 'negative': 8, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:33:29,709] Trial 14 finished with value: 0.009020601148260723 and parameters: {'sg': 1, 'window': 5, 'ns_exponent': -2.823512498876477, 'negative': 8, 'min_count': 17, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0090 Hitrate@10 = 0.2197
{'sg': 1, 'window_len': 1, 'ns_exponent': -0.625034301031024, 'negative': 3, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:33:30,271] Trial 15 finished with value: 0.011324468085106383 and parameters: {'sg': 1, 'window': 1, 'ns_exponent': -0.625034301031024, 'negative': 3, 'min_count': 17, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0113 Hitrate@10 = 0.2527
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.6430593866437229, 'negative': 11, 'min_count': 14, 'vector_size': 16}


[I 2023-10-21 10:33:34,335] Trial 16 finished with value: 0.014500379939209726 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.6430593866437229, 'negative': 11, 'min_count': 14, 'vector_size': 16}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0145 Hitrate@10 = 0.3259
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.9252089604529743, 'negative': 20, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:33:41,349] Trial 17 finished with value: 0.01738268321513002 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.9252089604529743, 'negative': 20, 'min_count': 17, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0174 Hitrate@10 = 0.3450
{'sg': 1, 'window_len': 6, 'ns_exponent': 0.13353913721585572, 'negative': 8, 'min_count': 15, 'vector_size': 128}


[I 2023-10-21 10:33:44,015] Trial 18 finished with value: 0.026428191489361704 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': 0.13353913721585572, 'negative': 8, 'min_count': 15, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0264 Hitrate@10 = 0.4830
{'sg': 1, 'window_len': 9, 'ns_exponent': -1.069463501581109, 'negative': 12, 'min_count': 9, 'vector_size': 128}


[I 2023-10-21 10:33:49,134] Trial 19 finished with value: 0.014239446133063154 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': -1.069463501581109, 'negative': 12, 'min_count': 9, 'vector_size': 128}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0142 Hitrate@10 = 0.3089
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.21056636326668382, 'negative': 7, 'min_count': 20, 'vector_size': 16}


[I 2023-10-21 10:33:51,347] Trial 20 finished with value: 0.027304922323539347 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.21056636326668382, 'negative': 7, 'min_count': 20, 'vector_size': 16}. Best is trial 12 with value: 0.030962470449172576.


MAP@10 = 0.0273 Hitrate@10 = 0.4820
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.07885693302126778, 'negative': 7, 'min_count': 20, 'vector_size': 16}


[I 2023-10-21 10:33:53,940] Trial 21 finished with value: 0.03159612462006079 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.07885693302126778, 'negative': 7, 'min_count': 20, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0316 Hitrate@10 = 0.4989
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.1191616832537234, 'negative': 7, 'min_count': 19, 'vector_size': 16}


[I 2023-10-21 10:33:55,996] Trial 22 finished with value: 0.028870694022289766 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.1191616832537234, 'negative': 7, 'min_count': 19, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0289 Hitrate@10 = 0.4926
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.5814176207119313, 'negative': 7, 'min_count': 19, 'vector_size': 16}


[I 2023-10-21 10:33:58,576] Trial 23 finished with value: 0.01721605876393111 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.5814176207119313, 'negative': 7, 'min_count': 19, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0172 Hitrate@10 = 0.3493
{'sg': 1, 'window_len': 5, 'ns_exponent': -0.6070057282001746, 'negative': 3, 'min_count': 18, 'vector_size': 16}


[I 2023-10-21 10:33:59,490] Trial 24 finished with value: 0.018922534616683553 and parameters: {'sg': 1, 'window': 5, 'ns_exponent': -0.6070057282001746, 'negative': 3, 'min_count': 18, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0189 Hitrate@10 = 0.3800
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.9232359617365349, 'negative': 9, 'min_count': 15, 'vector_size': 16}


[I 2023-10-21 10:34:02,301] Trial 25 finished with value: 0.015915569064505235 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.9232359617365349, 'negative': 9, 'min_count': 15, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0159 Hitrate@10 = 0.3376
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.8210292027560508, 'negative': 7, 'min_count': 20, 'vector_size': 16}


[I 2023-10-21 10:34:04,817] Trial 26 finished with value: 0.015127237419790612 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.8210292027560508, 'negative': 7, 'min_count': 20, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0151 Hitrate@10 = 0.3100
{'sg': 1, 'window_len': 4, 'ns_exponent': 0.06014684233782927, 'negative': 4, 'min_count': 11, 'vector_size': 16}


[I 2023-10-21 10:34:05,894] Trial 27 finished with value: 0.030690222897669702 and parameters: {'sg': 1, 'window': 4, 'ns_exponent': 0.06014684233782927, 'negative': 4, 'min_count': 11, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0307 Hitrate@10 = 0.5149
{'sg': 1, 'window_len': 4, 'ns_exponent': 0.17195676439299362, 'negative': 4, 'min_count': 8, 'vector_size': 16}


[I 2023-10-21 10:34:07,000] Trial 28 finished with value: 0.022204576156703813 and parameters: {'sg': 1, 'window': 4, 'ns_exponent': 0.17195676439299362, 'negative': 4, 'min_count': 8, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0222 Hitrate@10 = 0.4321
{'sg': 1, 'window_len': 4, 'ns_exponent': 2.954020762455409, 'negative': 4, 'min_count': 11, 'vector_size': 16}


[I 2023-10-21 10:34:08,035] Trial 29 finished with value: 0.004573919284025668 and parameters: {'sg': 1, 'window': 4, 'ns_exponent': 2.954020762455409, 'negative': 4, 'min_count': 11, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0046 Hitrate@10 = 0.1454
{'sg': 1, 'window_len': 3, 'ns_exponent': 2.19002327108228, 'negative': 6, 'min_count': 15, 'vector_size': 64}


[I 2023-10-21 10:34:09,170] Trial 30 finished with value: 0.00699096588990206 and parameters: {'sg': 1, 'window': 3, 'ns_exponent': 2.19002327108228, 'negative': 6, 'min_count': 15, 'vector_size': 64}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0070 Hitrate@10 = 0.1444
{'sg': 1, 'window_len': 3, 'ns_exponent': 0.2375133139226374, 'negative': 6, 'min_count': 18, 'vector_size': 16}


[I 2023-10-21 10:34:10,298] Trial 31 finished with value: 0.023278242147923 and parameters: {'sg': 1, 'window': 3, 'ns_exponent': 0.2375133139226374, 'negative': 6, 'min_count': 18, 'vector_size': 16}. Best is trial 21 with value: 0.03159612462006079.


MAP@10 = 0.0233 Hitrate@10 = 0.4183
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.02380591951456341, 'negative': 8, 'min_count': 19, 'vector_size': 16}


[I 2023-10-21 10:34:12,517] Trial 32 finished with value: 0.03217223910840932 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.02380591951456341, 'negative': 8, 'min_count': 19, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0322 Hitrate@10 = 0.5276
{'sg': 1, 'window_len': 2, 'ns_exponent': -0.12985333637161944, 'negative': 9, 'min_count': 16, 'vector_size': 16}


[I 2023-10-21 10:34:13,788] Trial 33 finished with value: 0.02799257007767646 and parameters: {'sg': 1, 'window': 2, 'ns_exponent': -0.12985333637161944, 'negative': 9, 'min_count': 16, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0280 Hitrate@10 = 0.4894
{'sg': 1, 'window_len': 5, 'ns_exponent': 0.9339163928064393, 'negative': 11, 'min_count': 12, 'vector_size': 16}


[I 2023-10-21 10:34:16,328] Trial 34 finished with value: 0.012817544748395812 and parameters: {'sg': 1, 'window': 5, 'ns_exponent': 0.9339163928064393, 'negative': 11, 'min_count': 12, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0128 Hitrate@10 = 0.2760
{'sg': 1, 'window_len': 6, 'ns_exponent': 0.4726845962289164, 'negative': 4, 'min_count': 20, 'vector_size': 16}


[I 2023-10-21 10:34:17,489] Trial 35 finished with value: 0.017952254305977713 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': 0.4726845962289164, 'negative': 4, 'min_count': 20, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0180 Hitrate@10 = 0.3577
{'sg': 1, 'window_len': 2, 'ns_exponent': 1.106991275250191, 'negative': 8, 'min_count': 6, 'vector_size': 64}


[I 2023-10-21 10:34:19,435] Trial 36 finished with value: 0.004822863897331983 and parameters: {'sg': 1, 'window': 2, 'ns_exponent': 1.106991275250191, 'negative': 8, 'min_count': 6, 'vector_size': 64}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0048 Hitrate@10 = 0.1614
{'sg': 0, 'window_len': 4, 'ns_exponent': -0.5637625790284488, 'negative': 13, 'min_count': 3, 'vector_size': 32}


[I 2023-10-21 10:34:20,329] Trial 37 finished with value: 0.012965594393785885 and parameters: {'sg': 0, 'window': 4, 'ns_exponent': -0.5637625790284488, 'negative': 13, 'min_count': 3, 'vector_size': 32}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0130 Hitrate@10 = 0.2877
{'sg': 1, 'window_len': 1, 'ns_exponent': 1.4442011776262054, 'negative': 16, 'min_count': 18, 'vector_size': 16}


[I 2023-10-21 10:34:21,837] Trial 38 finished with value: 0.007533308004052684 and parameters: {'sg': 1, 'window': 1, 'ns_exponent': 1.4442011776262054, 'negative': 16, 'min_count': 18, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0075 Hitrate@10 = 0.2038
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.06035377172129573, 'negative': 5, 'min_count': 10, 'vector_size': 32}


[I 2023-10-21 10:34:22,487] Trial 39 finished with value: 0.02367945795339412 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.06035377172129573, 'negative': 5, 'min_count': 10, 'vector_size': 32}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0237 Hitrate@10 = 0.4427
{'sg': 1, 'window_len': 3, 'ns_exponent': -1.3232209898796081, 'negative': 10, 'min_count': 13, 'vector_size': 64}


[I 2023-10-21 10:34:24,382] Trial 40 finished with value: 0.01237951705504897 and parameters: {'sg': 1, 'window': 3, 'ns_exponent': -1.3232209898796081, 'negative': 10, 'min_count': 13, 'vector_size': 64}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0124 Hitrate@10 = 0.2760
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.17923535364689197, 'negative': 7, 'min_count': 19, 'vector_size': 16}


[I 2023-10-21 10:34:26,521] Trial 41 finished with value: 0.026756543397500842 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.17923535364689197, 'negative': 7, 'min_count': 19, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0268 Hitrate@10 = 0.4682
{'sg': 1, 'window_len': 6, 'ns_exponent': 0.4359351884684365, 'negative': 6, 'min_count': 19, 'vector_size': 16}


[I 2023-10-21 10:34:28,195] Trial 42 finished with value: 0.01981695373184735 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': 0.4359351884684365, 'negative': 6, 'min_count': 19, 'vector_size': 16}. Best is trial 32 with value: 0.03217223910840932.


MAP@10 = 0.0198 Hitrate@10 = 0.3779
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.05912476547517344, 'negative': 8, 'min_count': 18, 'vector_size': 16}


[I 2023-10-21 10:34:30,522] Trial 43 finished with value: 0.034180091185410336 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.05912476547517344, 'negative': 8, 'min_count': 18, 'vector_size': 16}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0342 Hitrate@10 = 0.5329
{'sg': 0, 'window_len': 5, 'ns_exponent': -0.35902748668533857, 'negative': 9, 'min_count': 16, 'vector_size': 16}


[I 2023-10-21 10:34:31,266] Trial 44 finished with value: 0.018463441404930767 and parameters: {'sg': 0, 'window': 5, 'ns_exponent': -0.35902748668533857, 'negative': 9, 'min_count': 16, 'vector_size': 16}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0185 Hitrate@10 = 0.3811
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.05803359835812717, 'negative': 8, 'min_count': 18, 'vector_size': 16}


[I 2023-10-21 10:34:34,149] Trial 45 finished with value: 0.031954111786558596 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.05803359835812717, 'negative': 8, 'min_count': 18, 'vector_size': 16}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0320 Hitrate@10 = 0.5329
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.45272692607150233, 'negative': 8, 'min_count': 18, 'vector_size': 32}


[I 2023-10-21 10:34:37,387] Trial 46 finished with value: 0.022002701789935835 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.45272692607150233, 'negative': 8, 'min_count': 18, 'vector_size': 32}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0220 Hitrate@10 = 0.4183
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.06029500070101812, 'negative': 10, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:34:42,653] Trial 47 finished with value: 0.033634920634920636 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.06029500070101812, 'negative': 10, 'min_count': 16, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0336 Hitrate@10 = 0.5329
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.780277219229603, 'negative': 10, 'min_count': 16, 'vector_size': 16}


[I 2023-10-21 10:34:43,522] Trial 48 finished with value: 0.013317840256670046 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.780277219229603, 'negative': 10, 'min_count': 16, 'vector_size': 16}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0133 Hitrate@10 = 0.3015
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.43762950425455727, 'negative': 11, 'min_count': 14, 'vector_size': 128}


[I 2023-10-21 10:34:48,573] Trial 49 finished with value: 0.01794398007429922 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.43762950425455727, 'negative': 11, 'min_count': 14, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0179 Hitrate@10 = 0.3662
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.7739574603987153, 'negative': 12, 'min_count': 17, 'vector_size': 64}


[I 2023-10-21 10:34:53,627] Trial 50 finished with value: 0.01946550996285039 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.7739574603987153, 'negative': 12, 'min_count': 17, 'vector_size': 64}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0195 Hitrate@10 = 0.3737
{'sg': 1, 'window_len': 9, 'ns_exponent': -0.14512279924827992, 'negative': 10, 'min_count': 20, 'vector_size': 128}


[I 2023-10-21 10:34:58,489] Trial 51 finished with value: 0.02847082911178656 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': -0.14512279924827992, 'negative': 10, 'min_count': 20, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0285 Hitrate@10 = 0.4989
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.351358387305634, 'negative': 8, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:35:02,704] Trial 52 finished with value: 0.02367532083755488 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.351358387305634, 'negative': 8, 'min_count': 18, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0237 Hitrate@10 = 0.4416
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.04450721274642237, 'negative': 9, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:35:06,819] Trial 53 finished with value: 0.032356382978723404 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.04450721274642237, 'negative': 9, 'min_count': 19, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0324 Hitrate@10 = 0.5393
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.08264219609194082, 'negative': 9, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:35:10,995] Trial 54 finished with value: 0.033093465045592706 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.08264219609194082, 'negative': 9, 'min_count': 17, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0331 Hitrate@10 = 0.5372
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.38576856120564174, 'negative': 10, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:35:14,950] Trial 55 finished with value: 0.023444908814589667 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.38576856120564174, 'negative': 10, 'min_count': 16, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0234 Hitrate@10 = 0.4299
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.26012186048329783, 'negative': 9, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:35:18,847] Trial 56 finished with value: 0.02566130530226275 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.26012186048329783, 'negative': 9, 'min_count': 17, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0257 Hitrate@10 = 0.4278
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.6578150509337013, 'negative': 13, 'min_count': 14, 'vector_size': 128}


[I 2023-10-21 10:35:25,082] Trial 57 finished with value: 0.018411600810536985 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.6578150509337013, 'negative': 13, 'min_count': 14, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0184 Hitrate@10 = 0.3524
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.1584534065103575, 'negative': 11, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:35:25,985] Trial 58 finished with value: 0.023075903411009797 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.1584534065103575, 'negative': 11, 'min_count': 17, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0231 Hitrate@10 = 0.4544
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.6126323106912466, 'negative': 8, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:35:28,878] Trial 59 finished with value: 0.016995187436676797 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.6126323106912466, 'negative': 8, 'min_count': 19, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0170 Hitrate@10 = 0.3397
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.0198834909873613, 'negative': 9, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:35:32,326] Trial 60 finished with value: 0.03302144545761567 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.0198834909873613, 'negative': 9, 'min_count': 18, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0330 Hitrate@10 = 0.5372
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.08268852685230993, 'negative': 9, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:35:35,832] Trial 61 finished with value: 0.03189002870651807 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.08268852685230993, 'negative': 9, 'min_count': 18, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0319 Hitrate@10 = 0.5287
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.3478611711106473, 'negative': 9, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:35:38,945] Trial 62 finished with value: 0.021479103343465047 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.3478611711106473, 'negative': 9, 'min_count': 19, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0215 Hitrate@10 = 0.4066
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.0240100952482048, 'negative': 8, 'min_count': 15, 'vector_size': 128}


[I 2023-10-21 10:35:42,930] Trial 63 finished with value: 0.031288247213779125 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.0240100952482048, 'negative': 8, 'min_count': 15, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0313 Hitrate@10 = 0.5127
{'sg': 1, 'window_len': 9, 'ns_exponent': -0.27668545110378534, 'negative': 10, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:35:47,408] Trial 64 finished with value: 0.0245580884836204 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': -0.27668545110378534, 'negative': 10, 'min_count': 18, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0246 Hitrate@10 = 0.4522
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.5068515081786203, 'negative': 6, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:35:49,522] Trial 65 finished with value: 0.02038627152988855 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.5068515081786203, 'negative': 6, 'min_count': 16, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0204 Hitrate@10 = 0.4076
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.3221961008968451, 'negative': 11, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:35:54,311] Trial 66 finished with value: 0.023092874029044242 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.3221961008968451, 'negative': 11, 'min_count': 19, 'vector_size': 128}. Best is trial 43 with value: 0.034180091185410336.


MAP@10 = 0.0231 Hitrate@10 = 0.4140
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.019544320775065807, 'negative': 7, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:35:57,239] Trial 67 finished with value: 0.034665695710908474 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.019544320775065807, 'negative': 7, 'min_count': 17, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0347 Hitrate@10 = 0.5446
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.7770515811012342, 'negative': 7, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:36:00,066] Trial 68 finished with value: 0.01813728470111449 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.7770515811012342, 'negative': 7, 'min_count': 17, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0181 Hitrate@10 = 0.3694
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.039720816952864044, 'negative': 7, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:36:02,917] Trial 69 finished with value: 0.03456155015197568 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.039720816952864044, 'negative': 7, 'min_count': 17, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0346 Hitrate@10 = 0.5382
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.2464896053941942, 'negative': 7, 'min_count': 15, 'vector_size': 128}


[I 2023-10-21 10:36:05,800] Trial 70 finished with value: 0.024869849712934818 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.2464896053941942, 'negative': 7, 'min_count': 15, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0249 Hitrate@10 = 0.4512
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.054398507729226456, 'negative': 9, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:36:09,126] Trial 71 finished with value: 0.03226414218169537 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.054398507729226456, 'negative': 9, 'min_count': 16, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0323 Hitrate@10 = 0.5138
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.21535167405762023, 'negative': 9, 'min_count': 14, 'vector_size': 128}


[I 2023-10-21 10:36:12,940] Trial 72 finished with value: 0.02386613475177305 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.21535167405762023, 'negative': 9, 'min_count': 14, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0239 Hitrate@10 = 0.4437
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.07232820187871056, 'negative': 10, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:36:16,485] Trial 73 finished with value: 0.03073340932117528 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.07232820187871056, 'negative': 10, 'min_count': 16, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0307 Hitrate@10 = 0.5074
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.5890009301940449, 'negative': 6, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:36:19,108] Trial 74 finished with value: 0.01619178486997636 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.5890009301940449, 'negative': 6, 'min_count': 17, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0162 Hitrate@10 = 0.3397
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.46820813737468564, 'negative': 9, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:36:22,459] Trial 75 finished with value: 0.020915062478892268 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.46820813737468564, 'negative': 9, 'min_count': 17, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0209 Hitrate@10 = 0.3992
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.11424600480960653, 'negative': 7, 'min_count': 16, 'vector_size': 128}


[I 2023-10-21 10:36:25,157] Trial 76 finished with value: 0.02672500844309355 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.11424600480960653, 'negative': 7, 'min_count': 16, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0267 Hitrate@10 = 0.4820
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.26312019917859564, 'negative': 8, 'min_count': 12, 'vector_size': 128}


[I 2023-10-21 10:36:27,960] Trial 77 finished with value: 0.0244701536643026 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.26312019917859564, 'negative': 8, 'min_count': 12, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0245 Hitrate@10 = 0.4480
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.3056138885533601, 'negative': 5, 'min_count': 15, 'vector_size': 128}


[I 2023-10-21 10:36:30,511] Trial 78 finished with value: 0.020955673758865246 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.3056138885533601, 'negative': 5, 'min_count': 15, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0210 Hitrate@10 = 0.4161
{'sg': 0, 'window_len': 8, 'ns_exponent': 0.4774820864247317, 'negative': 10, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:36:31,348] Trial 79 finished with value: 0.01399712934819318 and parameters: {'sg': 0, 'window': 8, 'ns_exponent': 0.4774820864247317, 'negative': 10, 'min_count': 18, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0140 Hitrate@10 = 0.3333
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.0802089579034597, 'negative': 9, 'min_count': 20, 'vector_size': 32}


[I 2023-10-21 10:36:33,987] Trial 80 finished with value: 0.03064061972306653 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.0802089579034597, 'negative': 9, 'min_count': 20, 'vector_size': 32}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0306 Hitrate@10 = 0.5096
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.044646095930997484, 'negative': 7, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:36:36,302] Trial 81 finished with value: 0.03362512664640325 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.044646095930997484, 'negative': 7, 'min_count': 19, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0336 Hitrate@10 = 0.5329
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.16836201837345144, 'negative': 20, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:36:41,939] Trial 82 finished with value: 0.029297661263086795 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.16836201837345144, 'negative': 20, 'min_count': 18, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0293 Hitrate@10 = 0.5053
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.052456071032516885, 'negative': 7, 'min_count': 1, 'vector_size': 128}


[I 2023-10-21 10:36:44,639] Trial 83 finished with value: 0.034088230327592034 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.052456071032516885, 'negative': 7, 'min_count': 1, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0341 Hitrate@10 = 0.5117
{'sg': 1, 'window_len': 5, 'ns_exponent': -0.32796030451281244, 'negative': 7, 'min_count': 4, 'vector_size': 128}


[I 2023-10-21 10:36:46,901] Trial 84 finished with value: 0.0213741979061128 and parameters: {'sg': 1, 'window': 5, 'ns_exponent': -0.32796030451281244, 'negative': 7, 'min_count': 4, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0214 Hitrate@10 = 0.4140
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.5327884301857565, 'negative': 6, 'min_count': 1, 'vector_size': 128}


[I 2023-10-21 10:36:49,016] Trial 85 finished with value: 0.015156661600810539 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.5327884301857565, 'negative': 6, 'min_count': 1, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0152 Hitrate@10 = 0.3110
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.054821681467208616, 'negative': 7, 'min_count': 8, 'vector_size': 128}


[I 2023-10-21 10:36:51,562] Trial 86 finished with value: 0.0316347095575819 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.054821681467208616, 'negative': 7, 'min_count': 8, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0316 Hitrate@10 = 0.5318
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.2079989334953133, 'negative': 8, 'min_count': 6, 'vector_size': 64}


[I 2023-10-21 10:36:55,081] Trial 87 finished with value: 0.021772500844309355 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.2079989334953133, 'negative': 8, 'min_count': 6, 'vector_size': 64}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0218 Hitrate@10 = 0.4289
{'sg': 1, 'window_len': 5, 'ns_exponent': -0.2217014775305034, 'negative': 18, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:36:59,584] Trial 88 finished with value: 0.025085021952043228 and parameters: {'sg': 1, 'window': 5, 'ns_exponent': -0.2217014775305034, 'negative': 18, 'min_count': 19, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0251 Hitrate@10 = 0.4650
{'sg': 1, 'window_len': 6, 'ns_exponent': 0.39368235494987863, 'negative': 5, 'min_count': 20, 'vector_size': 128}


[I 2023-10-21 10:37:01,385] Trial 89 finished with value: 0.02057957615670382 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': 0.39368235494987863, 'negative': 5, 'min_count': 20, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0206 Hitrate@10 = 0.3790
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.7217629591294634, 'negative': 7, 'min_count': 19, 'vector_size': 128}


[I 2023-10-21 10:37:02,153] Trial 90 finished with value: 0.012706813576494428 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.7217629591294634, 'negative': 7, 'min_count': 19, 'vector_size': 128}. Best is trial 67 with value: 0.034665695710908474.


MAP@10 = 0.0127 Hitrate@10 = 0.3089
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.020629396237178063, 'negative': 9, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:37:05,338] Trial 91 finished with value: 0.034732480580884836 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.020629396237178063, 'negative': 9, 'min_count': 17, 'vector_size': 128}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0347 Hitrate@10 = 0.5467
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.019107373035056704, 'negative': 8, 'min_count': 18, 'vector_size': 128}


[I 2023-10-21 10:37:08,203] Trial 92 finished with value: 0.032462470449172574 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.019107373035056704, 'negative': 8, 'min_count': 18, 'vector_size': 128}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0325 Hitrate@10 = 0.5308
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.2109532926246713, 'negative': 8, 'min_count': 17, 'vector_size': 128}


[I 2023-10-21 10:37:11,131] Trial 93 finished with value: 0.02490759034110098 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.2109532926246713, 'negative': 8, 'min_count': 17, 'vector_size': 128}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0249 Hitrate@10 = 0.4501
{'sg': 1, 'window_len': 7, 'ns_exponent': -0.0012402059389011594, 'negative': 6, 'min_count': 18, 'vector_size': 32}


[I 2023-10-21 10:37:13,141] Trial 94 finished with value: 0.033022965214454574 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': -0.0012402059389011594, 'negative': 6, 'min_count': 18, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0330 Hitrate@10 = 0.5340
{'sg': 1, 'window_len': 8, 'ns_exponent': -0.16468397681103425, 'negative': 6, 'min_count': 0, 'vector_size': 32}


[I 2023-10-21 10:37:15,732] Trial 95 finished with value: 0.02683742823370483 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': -0.16468397681103425, 'negative': 6, 'min_count': 0, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0268 Hitrate@10 = 0.4671
{'sg': 1, 'window_len': 6, 'ns_exponent': -0.38278955965140593, 'negative': 5, 'min_count': 17, 'vector_size': 32}


[I 2023-10-21 10:37:17,330] Trial 96 finished with value: 0.02167869807497467 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': -0.38278955965140593, 'negative': 5, 'min_count': 17, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0217 Hitrate@10 = 0.4310
{'sg': 1, 'window_len': 7, 'ns_exponent': 0.5117105133615671, 'negative': 5, 'min_count': 10, 'vector_size': 32}


[I 2023-10-21 10:37:19,206] Trial 97 finished with value: 0.015501519756838906 and parameters: {'sg': 1, 'window': 7, 'ns_exponent': 0.5117105133615671, 'negative': 5, 'min_count': 10, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0155 Hitrate@10 = 0.3471
{'sg': 1, 'window_len': 8, 'ns_exponent': 0.15214988812821462, 'negative': 7, 'min_count': 18, 'vector_size': 32}


[I 2023-10-21 10:37:21,885] Trial 98 finished with value: 0.028558130699088146 and parameters: {'sg': 1, 'window': 8, 'ns_exponent': 0.15214988812821462, 'negative': 7, 'min_count': 18, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0286 Hitrate@10 = 0.4915
{'sg': 1, 'window_len': 6, 'ns_exponent': 0.3833942257883662, 'negative': 12, 'min_count': 2, 'vector_size': 32}


[I 2023-10-21 10:37:25,726] Trial 99 finished with value: 0.012993287740628167 and parameters: {'sg': 1, 'window': 6, 'ns_exponent': 0.3833942257883662, 'negative': 12, 'min_count': 2, 'vector_size': 32}. Best is trial 91 with value: 0.034732480580884836.


MAP@10 = 0.0130 Hitrate@10 = 0.3217


{'sg': 1,
 'window': 7,
 'ns_exponent': -0.020629396237178063,
 'negative': 9,
 'min_count': 17,
 'vector_size': 128}

In [7]:
set_seed(SEED)
model = Word2Vec(
    grouped_df['train_ids'].to_list(),
    **study.best_params,
    hs=0,
    seed=SEED,
    epochs=50
)

hitrate_list = []
for train_ids, y_rel in grouped_df.select('train_ids', 'test_ids').rows():
    model_preds = model.predict_output_word(train_ids, topn=(TOP_K + len(train_ids)))
    if model_preds is None:
        hitrate_list.append(0)
        continue
        
    y_rec = [pred[0] for pred in model_preds if pred[0] not in train_ids]
    hitrate_list.append(user_hitrate(y_rel, y_rec))

mean_ap, mean_hitrate = evaluate_model(model)
print(f'MAP@{TOP_K} = {mean_ap:.4f} Hitrate@{TOP_K} = {mean_hitrate:.4f}')

MAP@10 = 0.0353 Hitrate@10 = 0.5499


## Сохраним рекомендации в redis

В реальных системах в качестве идентификаторов скорее всего будет использоваться uuid, тогда как при обучении удобно использовать представление в виде целых чисел.

Одним из вариантов работы с идентификаторами является создание словарей `user_ids_mapping` и `user_ids_inverse_mapping`, где первый делает преобразование _uuid -> int_, а второй _int -> uuid_.

Давайте просимулируем реальную рекомендательную систему и загрузим рекомендации в **redis**.

In [8]:
user_ids_inverse_mapping = {k: uuid.uuid4() for k in ratings['user_id'].unique()}
item_ids_inverse_mapping = {k: uuid.uuid4() for k in ratings['item_id'].unique()}

In [9]:
# если redis запущен в том же окружении, то можно использовать localhost
# иначе, измените host на ip-адресс сервера с запущенным redis
r = redis.Redis(host='localhost', db=0)
used_memory_before = r.info('memory')['used_memory']

In [10]:
TOP_K = 100  # сохраним топ-100 рекомендаций

for user_id, train_ids in grouped_df.select('user_id', 'train_ids').rows():
    model_preds = model.predict_output_word(train_ids, topn=(TOP_K + len(train_ids)))
    if model_preds is None:
        continue
        
    y_rec = [pred[0] for pred in model_preds if pred[0] not in train_ids]
    r.rpush(
        str(user_ids_inverse_mapping[user_id]),
        *[str(item_ids_inverse_mapping[item_id]) for item_id in y_rec]
    )

In [11]:
# прочитаем сохраненные рекомендации по ключу user_id
r.lrange(str(user_ids_inverse_mapping[1]), 0, TOP_K)

[b'64a78b2d-98b5-4734-a5a3-4580df5b298e',
 b'ae49927d-850e-43b2-96ef-6dfcfecb97b0',
 b'd50a1c43-1a0b-4c25-a65a-db58f3ebdf89',
 b'17189576-3284-492d-9e72-a5bda874c43a',
 b'92157072-1cd6-495d-a079-3822cbfbd26c',
 b'af769df0-4b8b-4afe-b878-82cacd978b12',
 b'e5cc28ba-b163-435a-9f84-85a5e9c1fe48',
 b'2d9ed025-4e10-4972-a43b-b7c706c0b0eb',
 b'a50a97b9-b9f2-45a0-b6d6-d3594a433a05',
 b'bf74a277-17fc-4a03-bb4c-c8d55086ad5b',
 b'd8b04319-7cfc-4492-acc8-b41f71a7d29b',
 b'bda01853-6a53-4420-8f17-a6584f4a60a0',
 b'cb2d057a-08e8-4580-a105-b5780a9aef69',
 b'bb18e13f-4f46-4a1c-8fc3-be8b2beef2b2',
 b'ab431b53-5fce-40a8-b41a-6de6e7b426af',
 b'22fb29ae-6b4e-4daf-9bf9-166210eced0f',
 b'a2eabe72-2922-4cb4-a2c3-2e88d586ae44',
 b'9f49f742-7a96-4d59-8f46-b56bc94bad73',
 b'5d53687c-1704-4a3e-a4d6-ef5016dd1871',
 b'9f34c9cc-515e-4c19-935a-dfee72e5749e',
 b'cb59cf96-f56e-4091-b3ce-b8333cdcc523',
 b'4da5b153-b211-4902-b06f-7497cd94ca09',
 b'2f287950-266c-494b-a2ee-da14775cebeb',
 b'd7ca1748-8c16-40c5-945d-6639787

In [12]:
r.info('memory')['used_memory'] - used_memory_before

4764320

Потратили около 5Мб на сохранение рекомендаций

In [13]:
r = redis.Redis(db=1)
used_memory_before = r.info('memory')['used_memory']

In [14]:
TOP_K = 100  # сохраним топ-100 рекомендаций

for user_id, train_ids in grouped_df.select('user_id', 'train_ids').rows():
    model_preds = model.predict_output_word(train_ids, topn=(TOP_K + len(train_ids)))
    if model_preds is None:
        continue
        
    y_rec = [pred[0] for pred in model_preds if pred[0] not in train_ids]
    r.rpush(
        user_ids_inverse_mapping[user_id].bytes,
        *[item_ids_inverse_mapping[item_id].bytes for item_id in y_rec]
    )

In [15]:
# прочитаем сохраненные рекомендации по ключу user_id
# для этого сначала преобразуем user_uuid в байты, а затем байты в item_uuid
[
    uuid.UUID(bytes=item_id_bytes, version=4)
    for item_id_bytes in r.lrange(user_ids_inverse_mapping[1].bytes, 0, TOP_K)
]

[UUID('64a78b2d-98b5-4734-a5a3-4580df5b298e'),
 UUID('ae49927d-850e-43b2-96ef-6dfcfecb97b0'),
 UUID('d50a1c43-1a0b-4c25-a65a-db58f3ebdf89'),
 UUID('17189576-3284-492d-9e72-a5bda874c43a'),
 UUID('92157072-1cd6-495d-a079-3822cbfbd26c'),
 UUID('af769df0-4b8b-4afe-b878-82cacd978b12'),
 UUID('e5cc28ba-b163-435a-9f84-85a5e9c1fe48'),
 UUID('2d9ed025-4e10-4972-a43b-b7c706c0b0eb'),
 UUID('a50a97b9-b9f2-45a0-b6d6-d3594a433a05'),
 UUID('bf74a277-17fc-4a03-bb4c-c8d55086ad5b'),
 UUID('d8b04319-7cfc-4492-acc8-b41f71a7d29b'),
 UUID('bda01853-6a53-4420-8f17-a6584f4a60a0'),
 UUID('cb2d057a-08e8-4580-a105-b5780a9aef69'),
 UUID('bb18e13f-4f46-4a1c-8fc3-be8b2beef2b2'),
 UUID('ab431b53-5fce-40a8-b41a-6de6e7b426af'),
 UUID('22fb29ae-6b4e-4daf-9bf9-166210eced0f'),
 UUID('a2eabe72-2922-4cb4-a2c3-2e88d586ae44'),
 UUID('9f49f742-7a96-4d59-8f46-b56bc94bad73'),
 UUID('5d53687c-1704-4a3e-a4d6-ef5016dd1871'),
 UUID('9f34c9cc-515e-4c19-935a-dfee72e5749e'),
 UUID('cb59cf96-f56e-4091-b3ce-b8333cdcc523'),
 UUID('4da5b1

In [16]:
r.info('memory')['used_memory'] - used_memory_before

2277120

Потратили в 2 раза меньше памяти для сохранения рекомендаций 🎉🎉