In [1]:
import numpy as np
import pickle
import pandas as pd

from implicit.als import AlternatingLeastSquares
from implicit.cpu.bpr import BayesianPersonalizedRanking
from implicit.cpu.lmf import LogisticMatrixFactorization
from implicit.nearest_neighbours import CosineRecommender

from implicit.evaluation import ranking_metrics_at_k

from scipy.sparse import csr_matrix,coo_matrix
import multiprocessing
num_threads = multiprocessing.cpu_count()
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
optuna.logging.set_verbosity(optuna.logging.ERROR)

import logging
# logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("implicit").setLevel(logging.ERROR)

random_state = 42

In [2]:
# import psutil

# # Получение информации о памяти
# memory_info = psutil.virtual_memory()
# print(memory_info)

In [3]:
import gc
gc.collect()

0

In [4]:
# import os
# import threadpoolctl

# os.environ['MKL_NUM_THREADS'] = '5'
# os.environ['OPENBLAS_NUM_THREADS'] = '5'
# os.environ['OMP_NUM_THREADS'] = '5'

# threadpoolctl.threadpool_limits(5, "blas")

In [5]:
# # # Производим десериализацию и извлекаем из файла формата pkl
with open('data\events_cl.pkl', 'rb') as pkl_file:
    events = pickle.load(pkl_file)

events = events[['timestamp','visitorid','event','itemid']]
events.head()

Unnamed: 0,timestamp,visitorid,event,itemid
19,2015-06-02 04:57:58.505,158090,addtocart,10572
35,2015-06-02 05:57:24.638,361387,view,43485
110,2015-06-02 05:09:00.333,712443,view,346655
112,2015-06-02 05:12:21.632,599528,addtocart,356475
130,2015-06-02 05:17:56.276,599528,transaction,356475


In [6]:
# events = events.sample(frac=0.5, random_state=random_state)

In [7]:
# Построен валидационный датасет (разбивка по времени)
# На мой взгляд так более правильно (в соответствии с заданием)

split_date = '2015-07-29'
train = events[events['timestamp'].dt.strftime('%Y-%m-%d') < split_date]
test = events[events['timestamp'].dt.strftime('%Y-%m-%d') >= split_date]

print(train.shape[0] / events.shape[0])
print(test.shape[0] / events.shape[0])

0.6978126560159761
0.30218734398402397


In [8]:
events = events[['visitorid','event','itemid']]
train = train[['visitorid','event','itemid']]
test = test[['visitorid','event','itemid']]

In [9]:
event_type = {
            'view': 1,
            'addtocart':2,
            'transaction': 10,
            }

test['event'] = test['event'].map(event_type)
train['event'] = train['event'].map(event_type)
events['event'] = events['event'].map(event_type)

In [10]:
assert (train[['visitorid', 'itemid', 'event']] >= 0).all().all(), "Есть отрицательные значения!"

In [11]:
train_pivot_1 = pd.pivot_table(
                        train,
                        index="visitorid",
                        columns="itemid",
                        values="event",
                        )

In [12]:
test_pivot_1 = pd.pivot_table(
                    test,
                    index="visitorid",
                    columns="itemid",
                    values="event"
                    )

In [13]:
shell = pd.pivot_table(
                events,
                index="visitorid",
                columns="itemid",
                values="event",
                aggfunc=lambda x: 0
                )


print(train_pivot_1.shape)
print(test_pivot_1.shape)

(5413, 2825)
(2912, 2037)


In [14]:
train_pivot = shell + train_pivot_1
test_pivot = shell + test_pivot_1


train_pivot = (train_pivot + 1).fillna(0)
test_pivot = (test_pivot + 1).fillna(0)


print(train_pivot.shape)
print(test_pivot.shape)

(7821, 3032)
(7821, 3032)


In [15]:
train_pivot_sparse = csr_matrix(train_pivot.values)
test_pivot_sparse = csr_matrix(test_pivot.values)

In [16]:
models = [
        AlternatingLeastSquares(random_state=random_state,num_threads=num_threads),
        BayesianPersonalizedRanking(random_state=random_state,num_threads=num_threads),
        LogisticMatrixFactorization(random_state=random_state,num_threads=num_threads),
        CosineRecommender(num_threads=num_threads),
        ]

result_map_at10 = []
result_ndcg_at_k = []
result_auc = []


for model in models:
    model.fit(train_pivot_sparse)

    temp = ranking_metrics_at_k(model, train_pivot_sparse, test_pivot_sparse,K=3)
    result_map_at10.append(temp['map'])
    result_ndcg_at_k.append(temp['ndcg'])
    result_auc.append(temp['auc'])


models_name = []
for i in range(len(models)):
    models_name.append(str(models[i]))

  check_blas_config()
  check_blas_config()


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]



  0%|          | 0/3032 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

In [17]:
df_rez = pd.DataFrame(data=[models_name, result_map_at10, result_ndcg_at_k, result_auc]).T
df_rez.columns = ['model', 'map_at10', 'result_ndcg_at_k', 'result_auc']
df_rez

Unnamed: 0,model,map_at10,result_ndcg_at_k,result_auc
0,<implicit.cpu.als.AlternatingLeastSquares obje...,0.006897,0.008696,0.503072
1,<implicit.cpu.bpr.BayesianPersonalizedRanking ...,0.000687,0.000903,0.499823
2,<implicit.cpu.lmf.LogisticMatrixFactorization ...,0.0176,0.023024,0.509478
3,<implicit.nearest_neighbours.CosineRecommender...,0.008566,0.009902,0.504346


Параметры LogisticMatrixFactorization:

* factors: Количество скрытых факторов. Количество латентных факторов, которые модель использует для представления пользователей и элементов.
* learning_rate: Скорость обучения. Определяет размер шага, который модель делает при обновлении весов во время обучения.
* regularization: Параметр регуляризации. Контролирует величину регуляризации для предотвращения переобучения.
* iterations: Количество итераций. Количество итераций для алгоритма обучения.

In [18]:
def opt_Ext(trial):
    # задаем пространство поиска гиперпараметров

    factors = trial.suggest_categorical('factors',[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,20,25,30,50,75,100])
    learning_rate = trial.suggest_categorical('learning_rate',[0.0001,0.0005,0.001,0.005,0.01,0.05,0.1,0.5,1,2,3,4,5,6,7,8,9,10])
    regularization = trial.suggest_categorical('regularization',[0.001,0.005,0.01,0.05,0.1,0.5,1,2,3,4,5,6,7,8,9,10])
    iterations = trial.suggest_categorical('iterations',[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,20,25,30,50,75,100,200,500,1000,10000])


    # создаем модель
    model = LogisticMatrixFactorization(factors=factors,
                                        learning_rate=learning_rate,
                                        regularization=regularization,
                                        iterations=iterations,
                                        num_threads = num_threads,
                                        random_state=random_state,)

    model.fit(train_pivot_sparse)
    score = ranking_metrics_at_k(model, train_pivot_sparse, test_pivot_sparse,K=3)['ndcg']
    return score

In [19]:
# cоздаем объект исследования
stud = optuna.create_study(direction="maximize")


# ищем лучшую комбинацию гиперпараметров
stud.optimize(opt_Ext, n_trials=150)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

In [20]:
print(stud.best_params)

{'factors': 100, 'learning_rate': 8, 'regularization': 3, 'iterations': 1000}


In [21]:
best_model_implicit = LogisticMatrixFactorization(
                                                  factors=stud.best_params['factors'],
                                                learning_rate=stud.best_params['learning_rate'],
                                                regularization=stud.best_params['regularization'],
                                                iterations=stud.best_params['iterations'],
                                                num_threads = num_threads,
                                                random_state=random_state,)


best_model_implicit.fit(train_pivot_sparse)


# # Производим сериализацию и записываем результат в файл формата pkl
with open('models/best_model_implicit.pkl', 'wb') as output:
    pickle.dump(best_model_implicit, output)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [22]:
temp = ranking_metrics_at_k(best_model_implicit, train_pivot_sparse, test_pivot_sparse,K=3)
temp

  0%|          | 0/2912 [00:00<?, ?it/s]

{'precision': 0.03228547153780799,
 'map': 0.019125839438339454,
 'ndcg': 0.024927530555412365,
 'auc': 0.5099064909073107}

* Значение AUC ~ 0.5 указывает на случайное предсказание.
* Значение AUC 1.0 указывает на идеальное предсказание.

Высокое значение AUC (ближе к 1) означает, что модель лучше отделяет положительные примеры от отрицательных.  
В нашем случае идет простое угадвание.

Получение рекомендаций

In [None]:
user_id = 274515

train_pivot_list = train_pivot.index.tolist()

flag=0
try:
    # Получение индекса элемента
    index_user = train_pivot_list.index(user_id)
    print(f"Индекс пользователя {user_id} в списке: {index_user}")
    # Получение рекомендаций
    flag=1

except ValueError:
    print(f"Пользователь {user_id} не найден в списке.")

if flag:
    recomendations_ids, scores = best_model_implicit.recommend(index_user, train_pivot_sparse[index_user], N=10)
    unique_items = np.array(train_pivot.columns)
    recomendations = unique_items[recomendations_ids].tolist()

    print(f'Recomendations for user {user_id}: {recomendations}')
    print(f'Recomendations for user {user_id}: {scores}')

Индекс пользователя 274515 в списке: 1528
Recomendations for user 274515: [461686, 213834, 312728, 21674, 19523, 17114, 15283, 13711, 11150, 8651]
Recomendations for user 274515: [0.43608877 0.1287212  0.0177133  0.         0.         0.
 0.         0.         0.         0.        ]


In [24]:
unique_items = np.array(train_pivot.columns)

In [None]:
# # Производим сериализацию и записываем результат в файл формата pkl
with open('data/train_pivot_list_implicit.pkl', 'wb') as output:
    pickle.dump(train_pivot_list, output)

with open('data/train_pivot_sparse_implicit.pkl', 'wb') as output:
    pickle.dump(train_pivot_sparse, output)

with open('data/unique_items.pkl', 'wb') as output:
    pickle.dump(unique_items, output)