In [1]:
import os
os.chdir("../")
import pickle
import pandas as pd

from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from typing import Callable
from src.config import Config
from src.dataset import Interactions
from src.metrics import PrecisionAtK, RecallAtK, FScoreAtK
from src.optimizer import OptunaMaximizer

In [2]:
with Config.PREPROCESSED_INTERACTIONS_PATH.open("rb") as file_object:
    interactions = Interactions(pickle.load(file_object))

In [3]:
interactions.train = interactions.train[["user_id", "item_id", "interest_score"]]
interactions.test = interactions.test[["user_id", "item_id", "interest_score"]]
interactions.valid = interactions.valid[["user_id", "item_id", "interest_score"]]

In [4]:
interactions.train

Unnamed: 0,user_id,item_id,interest_score
0,126706,14433,0.400
1,127290,140952,0.290
2,66991,198453,0.445
3,46791,83486,0.615
4,79313,188770,0.940
...,...,...,...
1532993,153908,98585,0.220
1532994,154008,251969,0.020
1532995,154892,298192,0.840
1532996,156948,38118,0.890


In [5]:
interactions.test

Unnamed: 0,user_id,item_id,interest_score
1517915,101642,319500,0.835
1517917,130425,193445,0.490
1517918,93986,80733,0.235
1517919,159466,124115,0.420
1517920,158775,223806,0.270
...,...,...,...
1530838,141930,219928,0.450
1530839,53358,42887,0.290
1530840,151170,284652,0.135
1530841,141293,273421,0.240


In [6]:
interactions.valid

Unnamed: 0,user_id,item_id,interest_score
1503047,22032,287219,0.275
1503048,84214,121609,1.000
1503049,28992,11482,0.110
1503050,23345,281921,1.000
1503051,49466,2880,0.010
...,...,...,...
1517908,81768,294370,0.850
1517910,158991,99669,0.815
1517911,77232,142149,0.020
1517912,17843,174535,0.060


In [7]:
csr_train = csr_matrix(
    (interactions.train["interest_score"], 
    (interactions.train["user_id"], interactions.train["item_id"]))
)
csr_train

<159613x321752 sparse matrix of type '<class 'numpy.float32'>'
	with 1306588 stored elements in Compressed Sparse Row format>

In [8]:
class Objective:
    def __init__(self, train: csr_matrix, valid: pd.DataFrame, metric: Callable):
        self.train = train
        self.valid = valid
        self.metric = metric

    def __call__(self, trial) -> float:
        search_space = {
            "factors": trial.suggest_int("factors", 4, 256),
            "regularization": trial.suggest_float("regularization", 1e-8, 0.1),
            "iterations": trial.suggest_int("iterations", 8, 64),
            "num_threads": Config.NUM_THREADS
        }
        model = AlternatingLeastSquares(**search_space)
        model.fit(self.train)
        predictions = pd.DataFrame(self.valid["user_id"].drop_duplicates())
        predictions["item_id"] = predictions["user_id"].apply(
            lambda user_id: 
                model.recommend(
                    user_id, 
                    self.train[user_id],
                    N=Config.K,
                    filter_already_liked_items=True
                )[0]
        )
        predictions = predictions.explode("item_id")
        return self.valid["user_id"].drop_duplicates().apply(
            lambda user_id: 
                self.metric.calculate(
                    predictions[predictions["user_id"] == user_id]["item_id"],
                    self.valid[self.valid["user_id"] == user_id]["item_id"]
                )
        ).mean()

In [10]:
recall = RecallAtK(Config.K)
precision = PrecisionAtK(Config.K)

In [11]:
precision_objective = Objective(csr_train, interactions.valid, precision)
precision_optimizer = OptunaMaximizer(precision_objective, "ALS precision maximize")
precision_best_trial = precision_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-10 19:07:08,006][0m A new study created in memory with name: ALS precision maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:07:48,621][0m Trial 0 finished with value: 0.001788782213239008 and parameters: {'factors': 83, 'regularization': 0.030671577732026258, 'iterations': 21}. Best is trial 0 with value: 0.001788782213239008.[0m


  0%|          | 0/45 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:09:17,855][0m Trial 1 finished with value: 0.002283981808994446 and parameters: {'factors': 198, 'regularization': 0.05702905104311832, 'iterations': 45}. Best is trial 1 with value: 0.002283981808994446.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:09:43,983][0m Trial 2 finished with value: 0.001616978271854469 and parameters: {'factors': 47, 'regularization': 0.09043733214900626, 'iterations': 25}. Best is trial 1 with value: 0.002283981808994446.[0m


  0%|          | 0/43 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:10:02,859][0m Trial 3 finished with value: 0.0011116725618999478 and parameters: {'factors': 24, 'regularization': 0.007987769495773345, 'iterations': 43}. Best is trial 1 with value: 0.002283981808994446.[0m


  0%|          | 0/61 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:10:45,416][0m Trial 4 finished with value: 0.0017988883274380994 and parameters: {'factors': 83, 'regularization': 0.030524770726276112, 'iterations': 61}. Best is trial 1 with value: 0.002283981808994446.[0m


  0%|          | 0/55 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:12:37,234][0m Trial 5 finished with value: 0.002324406265790809 and parameters: {'factors': 233, 'regularization': 0.0030947458023845432, 'iterations': 55}. Best is trial 5 with value: 0.002324406265790809.[0m


  0%|          | 0/62 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:13:01,053][0m Trial 6 finished with value: 0.0014148559878726609 and parameters: {'factors': 37, 'regularization': 0.048738059075112514, 'iterations': 62}. Best is trial 5 with value: 0.002324406265790809.[0m


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:14:27,431][0m Trial 7 finished with value: 0.0021627084386053592 and parameters: {'factors': 198, 'regularization': 0.06194426616244957, 'iterations': 33}. Best is trial 5 with value: 0.002324406265790809.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:15:56,289][0m Trial 8 finished with value: 0.002657908034360796 and parameters: {'factors': 228, 'regularization': 0.09379858785960193, 'iterations': 10}. Best is trial 8 with value: 0.002657908034360796.[0m


  0%|          | 0/36 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:16:24,306][0m Trial 9 finished with value: 0.0017281455280444649 and parameters: {'factors': 65, 'regularization': 0.05744992912922361, 'iterations': 36}. Best is trial 8 with value: 0.002657908034360796.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:16:38,570][0m Trial 10 finished with value: 0.0022738756947953544 and parameters: {'factors': 146, 'regularization': 0.09616806047956591, 'iterations': 10}. Best is trial 8 with value: 0.002657908034360796.[0m


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:16:55,156][0m Trial 11 finished with value: 0.0027488630621526114 and parameters: {'factors': 252, 'regularization': 0.0009609926864804453, 'iterations': 9}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:17:11,951][0m Trial 12 finished with value: 0.0027185447195553405 and parameters: {'factors': 255, 'regularization': 0.08079229839776317, 'iterations': 9}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/19 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:17:30,596][0m Trial 13 finished with value: 0.002577059120768073 and parameters: {'factors': 252, 'regularization': 0.0773736356604047, 'iterations': 19}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:17:42,656][0m Trial 14 finished with value: 0.002213239009600813 and parameters: {'factors': 142, 'regularization': 0.07499221654573365, 'iterations': 8}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:17:57,362][0m Trial 15 finished with value: 0.002395149065184442 and parameters: {'factors': 184, 'regularization': 0.017574945198963698, 'iterations': 16}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/27 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:18:17,958][0m Trial 16 finished with value: 0.0025366346639717104 and parameters: {'factors': 254, 'regularization': 0.00045710918590640554, 'iterations': 27}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:18:32,372][0m Trial 17 finished with value: 0.002223345123799902 and parameters: {'factors': 173, 'regularization': 0.04281030684913233, 'iterations': 15}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/28 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:18:44,539][0m Trial 18 finished with value: 0.001970692268822639 and parameters: {'factors': 112, 'regularization': 0.013707969884734557, 'iterations': 28}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/43 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:19:05,997][0m Trial 19 finished with value: 0.002334512379989899 and parameters: {'factors': 223, 'regularization': 0.023559493823836625, 'iterations': 43}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:19:12,917][0m Trial 20 finished with value: 0.0007882769075290545 and parameters: {'factors': 5, 'regularization': 0.037717239066698965, 'iterations': 13}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:19:28,350][0m Trial 21 finished with value: 0.0025467407781708 and parameters: {'factors': 219, 'regularization': 0.09972184696517383, 'iterations': 8}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:19:47,187][0m Trial 22 finished with value: 0.0025265285497726195 and parameters: {'factors': 238, 'regularization': 0.084853445421712, 'iterations': 21}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:20:03,880][0m Trial 23 finished with value: 0.002526528549772619 and parameters: {'factors': 212, 'regularization': 0.06989850392147001, 'iterations': 14}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:20:17,718][0m Trial 24 finished with value: 0.002253663466397174 and parameters: {'factors': 167, 'regularization': 0.08788223822609476, 'iterations': 11}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:20:36,878][0m Trial 25 finished with value: 0.0025669530065689813 and parameters: {'factors': 253, 'regularization': 0.08090717334507658, 'iterations': 18}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/22 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:20:54,287][0m Trial 26 finished with value: 0.0024052551793835326 and parameters: {'factors': 208, 'regularization': 0.07104415366119154, 'iterations': 22}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:21:12,418][0m Trial 27 finished with value: 0.0026478019201617055 and parameters: {'factors': 241, 'regularization': 0.09364418911985527, 'iterations': 12}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:21:33,114][0m Trial 28 finished with value: 0.002374936836786262 and parameters: {'factors': 229, 'regularization': 0.0844145521843311, 'iterations': 31}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:21:45,681][0m Trial 29 finished with value: 0.0020313289540171827 and parameters: {'factors': 111, 'regularization': 0.0932047531694253, 'iterations': 24}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:22:00,391][0m Trial 30 finished with value: 0.002435573521980805 and parameters: {'factors': 190, 'regularization': 0.06737352875277082, 'iterations': 9}. Best is trial 11 with value: 0.0027488630621526114.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:22:18,012][0m Trial 31 finished with value: 0.0027084386053562496 and parameters: {'factors': 240, 'regularization': 0.09389889279604427, 'iterations': 13}. Best is trial 11 with value: 0.0027488630621526114.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [12]:
recall_objective = Objective(csr_train, interactions.valid, recall)
recall_optimizer = OptunaMaximizer(recall_objective, "ALS recall maximize")
recall_best_trial = recall_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-10 19:22:19,976][0m A new study created in memory with name: ALS recall maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/64 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:22:35,042][0m Trial 0 finished with value: 0.008247920318410465 and parameters: {'factors': 113, 'regularization': 0.09391687455263865, 'iterations': 64}. Best is trial 0 with value: 0.008247920318410465.[0m


  0%|          | 0/52 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:22:44,188][0m Trial 1 finished with value: 0.006628666747907909 and parameters: {'factors': 36, 'regularization': 0.05120279467247359, 'iterations': 52}. Best is trial 0 with value: 0.008247920318410465.[0m


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:23:00,949][0m Trial 2 finished with value: 0.010028413139833048 and parameters: {'factors': 176, 'regularization': 0.04100125238185799, 'iterations': 32}. Best is trial 2 with value: 0.010028413139833048.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:23:15,779][0m Trial 3 finished with value: 0.010175268978957713 and parameters: {'factors': 153, 'regularization': 0.06012885694308886, 'iterations': 10}. Best is trial 3 with value: 0.010175268978957713.[0m


  0%|          | 0/37 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:23:31,785][0m Trial 4 finished with value: 0.009552722928659394 and parameters: {'factors': 159, 'regularization': 0.05902700639922944, 'iterations': 37}. Best is trial 3 with value: 0.010175268978957713.[0m


  0%|          | 0/49 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:23:45,052][0m Trial 5 finished with value: 0.007902745870153653 and parameters: {'factors': 103, 'regularization': 0.03755675770188025, 'iterations': 49}. Best is trial 3 with value: 0.010175268978957713.[0m


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:23:54,545][0m Trial 6 finished with value: 0.007073369615057335 and parameters: {'factors': 49, 'regularization': 0.0035435955382560756, 'iterations': 26}. Best is trial 3 with value: 0.010175268978957713.[0m


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:24:13,189][0m Trial 7 finished with value: 0.01062086403294084 and parameters: {'factors': 199, 'regularization': 0.09148623867640078, 'iterations': 33}. Best is trial 7 with value: 0.01062086403294084.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:24:20,429][0m Trial 8 finished with value: 0.0035113513251351855 and parameters: {'factors': 8, 'regularization': 0.04290722539499346, 'iterations': 21}. Best is trial 7 with value: 0.01062086403294084.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:24:36,197][0m Trial 9 finished with value: 0.010081105660943964 and parameters: {'factors': 190, 'regularization': 0.07559174908070387, 'iterations': 18}. Best is trial 7 with value: 0.01062086403294084.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:24:59,899][0m Trial 10 finished with value: 0.010934165211578046 and parameters: {'factors': 243, 'regularization': 0.09031432220054049, 'iterations': 41}. Best is trial 10 with value: 0.010934165211578046.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:25:22,778][0m Trial 11 finished with value: 0.011410045796099359 and parameters: {'factors': 240, 'regularization': 0.09940301542706396, 'iterations': 41}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/45 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:25:47,461][0m Trial 12 finished with value: 0.010797135883037852 and parameters: {'factors': 241, 'regularization': 0.09889029222274809, 'iterations': 45}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:26:11,764][0m Trial 13 finished with value: 0.011183231407323505 and parameters: {'factors': 256, 'regularization': 0.0810270667633612, 'iterations': 42}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/58 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:26:36,177][0m Trial 14 finished with value: 0.010210446030304545 and parameters: {'factors': 219, 'regularization': 0.07794697722593145, 'iterations': 58}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/53 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:27:02,444][0m Trial 15 finished with value: 0.010869884936826956 and parameters: {'factors': 256, 'regularization': 0.0758288282002099, 'iterations': 53}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:27:23,953][0m Trial 16 finished with value: 0.010548473339024124 and parameters: {'factors': 212, 'regularization': 0.09863842341496179, 'iterations': 41}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/28 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:27:43,990][0m Trial 17 finished with value: 0.01098036935833803 and parameters: {'factors': 230, 'regularization': 0.0831417683176579, 'iterations': 28}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/44 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:27:55,475][0m Trial 18 finished with value: 0.007895021855355356 and parameters: {'factors': 80, 'regularization': 0.08448792893032309, 'iterations': 44}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/38 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:28:12,619][0m Trial 19 finished with value: 0.008624174797747311 and parameters: {'factors': 140, 'regularization': 0.06905083519751586, 'iterations': 38}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/58 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:28:33,321][0m Trial 20 finished with value: 0.009866070008818874 and parameters: {'factors': 178, 'regularization': 0.09907061176702411, 'iterations': 58}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:28:52,844][0m Trial 21 finished with value: 0.011300161855998135 and parameters: {'factors': 227, 'regularization': 0.08442895286991793, 'iterations': 26}. Best is trial 11 with value: 0.011410045796099359.[0m


  0%|          | 0/27 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:29:14,247][0m Trial 22 finished with value: 0.012021542266615535 and parameters: {'factors': 251, 'regularization': 0.08474473367166906, 'iterations': 27}. Best is trial 22 with value: 0.012021542266615535.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:29:32,640][0m Trial 23 finished with value: 0.010834344758043598 and parameters: {'factors': 214, 'regularization': 0.0885267956712537, 'iterations': 21}. Best is trial 22 with value: 0.012021542266615535.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:29:50,133][0m Trial 24 finished with value: 0.01238645653375315 and parameters: {'factors': 229, 'regularization': 0.08807343274177562, 'iterations': 12}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:30:05,536][0m Trial 25 finished with value: 0.011444505048598027 and parameters: {'factors': 200, 'regularization': 0.09358071235000723, 'iterations': 8}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:30:20,757][0m Trial 26 finished with value: 0.011370814724012432 and parameters: {'factors': 196, 'regularization': 0.06779706341792173, 'iterations': 8}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:30:37,081][0m Trial 27 finished with value: 0.011163693277134411 and parameters: {'factors': 203, 'regularization': 0.08888887913302163, 'iterations': 13}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:30:52,541][0m Trial 28 finished with value: 0.010801126194001386 and parameters: {'factors': 182, 'regularization': 0.073911642193842, 'iterations': 15}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:31:04,937][0m Trial 29 finished with value: 0.009650505717964032 and parameters: {'factors': 126, 'regularization': 0.09300264310472277, 'iterations': 12}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:31:21,780][0m Trial 30 finished with value: 0.00966996326900319 and parameters: {'factors': 162, 'regularization': 0.08436202545810965, 'iterations': 17}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:31:41,399][0m Trial 31 finished with value: 0.01155227228167009 and parameters: {'factors': 238, 'regularization': 0.09395045573669941, 'iterations': 21}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:32:00,734][0m Trial 32 finished with value: 0.011472082462481655 and parameters: {'factors': 230, 'regularization': 0.09263504339475297, 'iterations': 21}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/22 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:32:20,029][0m Trial 33 finished with value: 0.011394181165530332 and parameters: {'factors': 231, 'regularization': 0.08914837603718993, 'iterations': 22}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/28 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:32:41,779][0m Trial 34 finished with value: 0.01152885540883432 and parameters: {'factors': 248, 'regularization': 0.0951710232998412, 'iterations': 28}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/30 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:33:03,613][0m Trial 35 finished with value: 0.011706347895573897 and parameters: {'factors': 251, 'regularization': 0.08057804107742401, 'iterations': 30}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:33:23,820][0m Trial 36 finished with value: 0.011438408175396554 and parameters: {'factors': 216, 'regularization': 0.08178017891013739, 'iterations': 33}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/30 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:33:45,864][0m Trial 37 finished with value: 0.011482428076667594 and parameters: {'factors': 255, 'regularization': 0.07089733505990624, 'iterations': 30}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:34:05,957][0m Trial 38 finished with value: 0.011471404346593836 and parameters: {'factors': 239, 'regularization': 0.06468100917751185, 'iterations': 25}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:34:20,726][0m Trial 39 finished with value: 0.009495753109952201 and parameters: {'factors': 167, 'regularization': 0.07660894439528808, 'iterations': 17}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/35 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:34:33,081][0m Trial 40 finished with value: 0.008124161644879179 and parameters: {'factors': 100, 'regularization': 0.07931960603042912, 'iterations': 35}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/29 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:34:54,586][0m Trial 41 finished with value: 0.011446031601907429 and parameters: {'factors': 244, 'regularization': 0.09572052962139486, 'iterations': 29}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/30 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:35:16,206][0m Trial 42 finished with value: 0.010957939042061458 and parameters: {'factors': 249, 'regularization': 0.0869259753372426, 'iterations': 30}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:35:34,820][0m Trial 43 finished with value: 0.011838623450548664 and parameters: {'factors': 224, 'regularization': 0.09385199441832266, 'iterations': 24}. Best is trial 24 with value: 0.01238645653375315.[0m


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-03-10 19:35:53,730][0m Trial 44 finished with value: 0.011117075333851485 and parameters: {'factors': 222, 'regularization': 0.08676798017721617, 'iterations': 24}. Best is trial 24 with value: 0.01238645653375315.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [13]:
precision_model = AlternatingLeastSquares(**precision_best_trial.params)
precision_model.fit(csr_train)

  0%|          | 0/9 [00:00<?, ?it/s]

In [14]:
recall_model = AlternatingLeastSquares(**recall_best_trial.params)
recall_model.fit(csr_train)

  0%|          | 0/12 [00:00<?, ?it/s]

In [15]:
models = {
    "recall_model": recall_model,
    "precision_model": precision_model
}

In [16]:
metrics = {
    "precision": precision,
    "recall": recall
}

In [17]:
for model_name, model in models.items():
    pred = pd.DataFrame(interactions.test["user_id"].drop_duplicates())
    print(f"Model '{model_name}':")
    pred["item_id"] = pred["user_id"].apply(
        lambda user_id: 
            model.recommend(
                user_id, 
                csr_train[user_id],
                N=Config.K,
                filter_already_liked_items=True
            )[0]
    )
    pred = pred.explode("item_id")
    for metric_name, metric in metrics.items():
        metric_val = interactions.test["user_id"].drop_duplicates().apply(
            lambda user_id: 
                metric.calculate(
                    pred[pred["user_id"] == user_id]["item_id"],
                    interactions.test[interactions.test["user_id"] == user_id]["item_id"]
                )
        ).mean()
        print(f"\tMetric {metric_name}@{Config.K}: {metric_val}")

Model 'recall_model':
	Metric precision@10: 0.0015958040397277059
	Metric recall@10: 0.008367300098133712
Model 'precision_model':
	Metric precision@10: 0.001752036603057691
	Metric recall@10: 0.009572124177078983


In [18]:
with Config.IMPLICIT_ALS_PATH.open("wb") as file_object:
    pickle.dump(precision_model, file_object)