In [27]:
import warnings
warnings.filterwarnings('ignore')

%load_ext autotime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

import random

from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.metrics import recall_score, f1_score
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

from implicit.als import AlternatingLeastSquares

from surprise import Dataset, Reader
import surprise.model_selection
#from surprise.model_selection import cross_validate, GridSearchCV
from surprise.prediction_algorithms import SVD, SVDpp, KNNBasic, KNNBaseline

from catboost import CatBoostClassifier, CatBoostRanker, Pool

from lightfm import LightFM
from lightfm.evaluation import precision_at_k, recall_at_k

from tqdm import tqdm

import itertools

import optuna

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 1.73 ms (started: 2024-10-26 01:20:46 +07:00)


In [15]:
import implicit
from scipy.sparse import coo_matrix
import pandas as pd

events = pd.read_csv('events.csv')

user_ids = np.arange(6040)
item_ids = np.arange(3706)

events = events.sort_values(['user_id', 'item_id'])
rows = events['user_id']
cols = events['item_id']
data = events['rating'].values

user_item_matrix = csr_matrix((data, (rows, cols)), dtype = 'float64')

# Инициализация моделей
bpr_model = implicit.bpr.BayesianPersonalizedRanking(factors=50)
lmf_model = implicit.lmf.LogisticMatrixFactorization(factors=50)

# Обучение моделей
bpr_model.fit(user_item_matrix)
lmf_model.fit(user_item_matrix)

100%|██████████| 100/100 [00:07<00:00, 12.57it/s, train_auc=89.73%, skipped=24.76%]
100%|██████████| 30/30 [00:03<00:00,  7.90it/s]

time: 12.1 s (started: 2024-10-26 01:18:28 +07:00)





In [5]:
model = AlternatingLeastSquares(factors=150, regularization=0.25, iterations=200, random_state=29)
model.fit(user_item_matrix)

100%|██████████| 200/200 [01:03<00:00,  3.16it/s]

time: 1min 3s (started: 2024-10-26 01:05:40 +07:00)





In [6]:
top_n = 3706
recommended_items = model.recommend(np.arange(6040), user_item_matrix, filter_already_liked_items = False, N=top_n)

time: 673 ms (started: 2024-10-26 01:06:43 +07:00)


In [7]:
als_df = pd.DataFrame([np.repeat(user_ids, top_n).tolist(),recommended_items[0].reshape(-1).tolist(), recommended_items[1].reshape(-1).tolist()]).T
als_df.columns = ['user_id', 'item_id', 'als_score']

time: 7min 43s (started: 2024-10-26 01:06:44 +07:00)


In [23]:
als_df.head()

Unnamed: 0,user_id,item_id,als_score
0,0.0,1711.0,1.359568
1,0.0,2673.0,1.241903
2,0.0,3597.0,1.239341
3,0.0,1030.0,1.197416
4,0.0,3677.0,1.17379


time: 14.7 ms (started: 2024-10-26 01:19:38 +07:00)


In [43]:
user_indices = als_df['user_id'].astype(int).values
item_indices = als_df['item_id'].astype(int).values

bpr_user_vectors = bpr_user_factors[user_indices]
bpr_item_vectors = bpr_item_factors[item_indices]
lmf_user_vectors = lmf_user_factors[user_indices]
lmf_item_vectors = lmf_item_factors[item_indices]

bpr_scores = np.einsum('ij,ij->i', bpr_user_vectors, bpr_item_vectors) / (
    np.linalg.norm(bpr_user_vectors, axis=1) * np.linalg.norm(bpr_item_vectors, axis=1)
)
lmf_scores = np.einsum('ij,ij->i', lmf_user_vectors, lmf_item_vectors) / (
    np.linalg.norm(lmf_user_vectors, axis=1) * np.linalg.norm(lmf_item_vectors, axis=1)
)

time: 3min 32s (started: 2024-10-26 01:24:00 +07:00)


In [44]:
als_df['bpr_score'] = bpr_scores
als_df['lmf_score'] = lmf_scores

time: 323 ms (started: 2024-10-26 01:27:33 +07:00)


In [45]:
als_df.head()

Unnamed: 0,user_id,item_id,als_score,bpr_score,lmf_score
0,0.0,1711.0,1.359568,0.462845,0.383199
1,0.0,2673.0,1.241903,0.207902,0.297794
2,0.0,3597.0,1.239341,0.50395,0.282673
3,0.0,1030.0,1.197416,0.250497,0.305708
4,0.0,3677.0,1.17379,0.238425,0.241454


time: 33.2 ms (started: 2024-10-26 01:27:33 +07:00)


In [46]:
als_df.to_csv('implicit_scores.csv', index = False)

time: 1min 40s (started: 2024-10-26 01:27:58 +07:00)
