In [1]:
import os
os.chdir("..")

In [2]:
import pickle
import pandas as pd
from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from typing import Callable
from src.config import Config
from src.dataset import Interactions
from src.metrics import PrecisionAtK, RecallAtK
from src.optimizer import OptunaMaximizer

In [3]:
with Config.PREPROCESSED_INTERACTIONS_PATH.open("rb") as file_object:
    interactions = pickle.load(file_object)

In [4]:
interactions["interest_score_binary"] = 0
interactions.loc[interactions["interest_score" ]>= .7, ["interest_score_binary"]] = 1

In [5]:
interactions

Unnamed: 0,user_id,item_id,progress,rating,start_date,interest_score,interest_score_binary
0,126706,14433,80,,2018-01-01,0.400,0
1,127290,140952,58,,2018-01-01,0.290,0
2,66991,198453,89,,2018-01-01,0.445,0
3,46791,83486,23,5.0,2018-01-01,0.615,0
4,79313,188770,88,5.0,2018-01-01,0.940,1
...,...,...,...,...,...,...,...
1532993,153908,98585,44,,2019-02-11,0.220,0
1532994,154008,251969,4,,2018-04-08,0.020,0
1532995,154892,298192,68,5.0,2019-02-16,0.840,1
1532996,156948,38118,78,5.0,2018-08-19,0.890,1


In [6]:
interactions = Interactions(interactions, test_days=30, valid_days=60)

In [7]:
interactions.train

Unnamed: 0,user_id,item_id,progress,rating,start_date,interest_score,interest_score_binary
0,126706,14433,80,,2018-01-01,0.400,0
1,127290,140952,58,,2018-01-01,0.290,0
2,66991,198453,89,,2018-01-01,0.445,0
3,46791,83486,23,5.0,2018-01-01,0.615,0
4,79313,188770,88,5.0,2018-01-01,0.940,1
...,...,...,...,...,...,...,...
1532993,153908,98585,44,,2019-02-11,0.220,0
1532994,154008,251969,4,,2018-04-08,0.020,0
1532995,154892,298192,68,5.0,2019-02-16,0.840,1
1532996,156948,38118,78,5.0,2018-08-19,0.890,1


In [8]:
interactions.train = pd.concat(
    [
        interactions.train,
        interactions.test[interactions.test["interest_score_binary"] == 0].copy(),
        interactions.valid[interactions.valid["interest_score_binary"] == 0].copy()
    ]
)
interactions.train

Unnamed: 0,user_id,item_id,progress,rating,start_date,interest_score,interest_score_binary
0,126706,14433,80,,2018-01-01,0.400,0
1,127290,140952,58,,2018-01-01,0.290,0
2,66991,198453,89,,2018-01-01,0.445,0
3,46791,83486,23,5.0,2018-01-01,0.615,0
4,79313,188770,88,5.0,2018-01-01,0.940,1
...,...,...,...,...,...,...,...
1469712,142744,230067,3,,2019-12-01,0.015,0
1469713,73145,223263,6,,2019-12-01,0.030,0
1469715,100060,313623,11,5.0,2019-12-01,0.555,0
1469717,82749,165018,2,,2019-12-01,0.010,0


In [9]:
interactions.test = interactions.test[interactions.test["interest_score_binary"] == 1]
interactions.test

Unnamed: 0,user_id,item_id,progress,rating,start_date,interest_score,interest_score_binary
1469727,129896,168726,100,5.0,2019-12-02,1.000,1
1469756,89727,75814,75,5.0,2019-12-02,0.875,1
1469772,49785,252986,99,5.0,2019-12-02,0.995,1
1469775,135061,141047,49,5.0,2019-12-02,0.745,1
1469777,47747,55458,99,3.0,2019-12-02,0.795,1
...,...,...,...,...,...,...,...
1530792,113785,230592,100,5.0,2019-12-30,1.000,1
1530793,49989,234938,99,5.0,2019-12-30,0.995,1
1530794,96202,230592,100,5.0,2019-12-30,1.000,1
1530799,29877,162000,79,5.0,2019-12-30,0.895,1


In [10]:
interactions.valid = interactions.valid[interactions.valid["interest_score_binary"] == 1]
interactions.valid

Unnamed: 0,user_id,item_id,progress,rating,start_date,interest_score,interest_score_binary
1406113,135781,266343,85,5.0,2019-11-02,0.925,1
1406118,140910,136327,100,5.0,2019-11-02,1.000,1
1406122,33450,109201,98,5.0,2019-11-02,0.990,1
1406130,159354,78547,91,5.0,2019-11-02,0.955,1
1406148,83685,168854,99,3.0,2019-11-02,0.795,1
...,...,...,...,...,...,...,...
1469657,2681,70828,86,5.0,2019-12-01,0.930,1
1469667,3951,239790,99,5.0,2019-12-01,0.995,1
1469689,10808,72830,88,5.0,2019-12-01,0.940,1
1469701,45181,310618,73,5.0,2019-12-01,0.865,1


In [11]:
interactions.train = interactions.train[["user_id", "item_id", "interest_score_binary"]]
interactions.test = interactions.test[["user_id", "item_id", "interest_score_binary"]]
interactions.valid = interactions.valid[["user_id", "item_id", "interest_score_binary"]]

In [12]:
csr_train = csr_matrix(
    (interactions.train["interest_score_binary"], 
    (interactions.train["user_id"], interactions.train["item_id"]))
)
csr_train

<159613x321752 sparse matrix of type '<class 'numpy.int64'>'
	with 1316432 stored elements in Compressed Sparse Row format>

In [13]:
class Objective:
    def __init__(self, train: csr_matrix, valid: pd.DataFrame, metric: Callable):
        self.train = train
        self.valid = valid
        self.metric = metric

    def __call__(self, trial) -> float:
        search_space = {
            "factors": trial.suggest_int("factors", 4, 256),
            "regularization": trial.suggest_float("regularization", 1e-8, 0.1),
            "iterations": trial.suggest_int("iterations", 8, 64),
            "num_threads": Config.NUM_THREADS
        }
        model = AlternatingLeastSquares(**search_space)
        model.fit(self.train)
        predictions = pd.DataFrame(self.valid["user_id"].drop_duplicates())
        predictions["item_id"] = predictions["user_id"].apply(
            lambda user_id: 
                model.recommend(
                    user_id, 
                    self.train[user_id],
                    N=Config.K,
                    filter_already_liked_items=True
                )[0]
        )
        predictions = predictions.explode("item_id")
        return self.valid["user_id"].drop_duplicates().apply(
            lambda user_id: 
                self.metric.calculate(
                    predictions[predictions["user_id"] == user_id]["item_id"],
                    self.valid[self.valid["user_id"] == user_id]["item_id"]
                )
        ).mean()

In [14]:
recall = RecallAtK(Config.K)
precision = PrecisionAtK(Config.K)

In [15]:
precision_objective = Objective(csr_train, interactions.valid, precision)
precision_optimizer = OptunaMaximizer(precision_objective, "ALS precision maximize")
precision_best_trial = precision_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-11 12:58:50,108][0m A new study created in memory with name: ALS precision maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:58:55,849][0m Trial 0 finished with value: 0.0037897586284683037 and parameters: {'factors': 126, 'regularization': 0.022618699036281446, 'iterations': 10}. Best is trial 0 with value: 0.0037897586284683037.[0m


  0%|          | 0/34 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:58:59,692][0m Trial 1 finished with value: 0.0030904579291676027 and parameters: {'factors': 41, 'regularization': 0.09106332119201567, 'iterations': 34}. Best is trial 0 with value: 0.0037897586284683037.[0m


  0%|          | 0/38 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:11,515][0m Trial 2 finished with value: 0.004331152718249495 and parameters: {'factors': 234, 'regularization': 0.06677505963776753, 'iterations': 38}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/63 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:24,240][0m Trial 3 finished with value: 0.003992781412136249 and parameters: {'factors': 195, 'regularization': 0.05596335908122881, 'iterations': 63}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:33,732][0m Trial 4 finished with value: 0.004150688021655764 and parameters: {'factors': 197, 'regularization': 0.026948533814286368, 'iterations': 31}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/23 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:39,073][0m Trial 5 finished with value: 0.003541619670651924 and parameters: {'factors': 100, 'regularization': 0.08330676340620381, 'iterations': 23}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:42,839][0m Trial 6 finished with value: 0.003067899842093387 and parameters: {'factors': 36, 'regularization': 0.0667089374645675, 'iterations': 31}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/61 [00:00<?, ?it/s]

[32m[I 2023-03-11 12:59:58,210][0m Trial 7 finished with value: 0.004218362282878414 and parameters: {'factors': 235, 'regularization': 0.024804369882455088, 'iterations': 61}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:00:05,289][0m Trial 8 finished with value: 0.003654410106023005 and parameters: {'factors': 116, 'regularization': 0.09310678766013543, 'iterations': 33}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/23 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:00:13,849][0m Trial 9 finished with value: 0.004240920369952626 and parameters: {'factors': 165, 'regularization': 0.07748073569956508, 'iterations': 23}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/50 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:00:32,200][0m Trial 10 finished with value: 0.004015339499210467 and parameters: {'factors': 248, 'regularization': 0.04464925058418682, 'iterations': 50}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:00:41,197][0m Trial 11 finished with value: 0.004218362282878414 and parameters: {'factors': 177, 'regularization': 0.07259292552516539, 'iterations': 17}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/46 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:00:59,230][0m Trial 12 finished with value: 0.004105571847507331 and parameters: {'factors': 169, 'regularization': 0.07410781155778096, 'iterations': 46}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:01:21,332][0m Trial 13 finished with value: 0.0042634784570268446 and parameters: {'factors': 220, 'regularization': 0.0023375497571714934, 'iterations': 42}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/46 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:01:36,905][0m Trial 14 finished with value: 0.004060455673358898 and parameters: {'factors': 222, 'regularization': 0.009663227765690799, 'iterations': 46}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:01:53,307][0m Trial 15 finished with value: 0.00424092036995263 and parameters: {'factors': 256, 'regularization': 0.002312636483268309, 'iterations': 41}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/55 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:09,386][0m Trial 16 finished with value: 0.003970223325062031 and parameters: {'factors': 212, 'regularization': 0.04220182640785501, 'iterations': 55}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/40 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:15,182][0m Trial 17 finished with value: 0.0037446424543198715 and parameters: {'factors': 75, 'regularization': 0.05957224353691369, 'iterations': 40}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/54 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:26,385][0m Trial 18 finished with value: 0.0037220843672456537 and parameters: {'factors': 153, 'regularization': 0.09989806539057167, 'iterations': 54}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/40 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:39,155][0m Trial 19 finished with value: 0.0039927814121362495 and parameters: {'factors': 217, 'regularization': 0.04091173913631528, 'iterations': 40}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:42,430][0m Trial 20 finished with value: 0.0022106925332731767 and parameters: {'factors': 9, 'regularization': 0.052451428594913314, 'iterations': 24}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/40 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:02:57,070][0m Trial 21 finished with value: 0.00399278141213625 and parameters: {'factors': 253, 'regularization': 0.005360198495633323, 'iterations': 40}. Best is trial 2 with value: 0.004331152718249495.[0m


  0%|          | 0/46 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:11,908][0m Trial 22 finished with value: 0.0041958041958041975 and parameters: {'factors': 250, 'regularization': 0.001987748484152738, 'iterations': 46}. Best is trial 2 with value: 0.004331152718249495.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [16]:
recall_objective = Objective(csr_train, interactions.valid, recall)
recall_optimizer = OptunaMaximizer(recall_objective, "ALS recall maximize")
recall_best_trial = recall_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-11 13:03:11,995][0m A new study created in memory with name: ALS recall maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:22,314][0m Trial 0 finished with value: 0.010714116914709991 and parameters: {'factors': 159, 'regularization': 0.08825201092591779, 'iterations': 53}. Best is trial 0 with value: 0.010714116914709991.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:28,697][0m Trial 1 finished with value: 0.010603754971896242 and parameters: {'factors': 114, 'regularization': 0.031087197986788554, 'iterations': 25}. Best is trial 0 with value: 0.010714116914709991.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:35,057][0m Trial 2 finished with value: 0.012689109939846495 and parameters: {'factors': 144, 'regularization': 0.09408982873820526, 'iterations': 12}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/50 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:41,505][0m Trial 3 finished with value: 0.009395158507349144 and parameters: {'factors': 87, 'regularization': 0.017456538412264105, 'iterations': 50}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/51 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:03:55,673][0m Trial 4 finished with value: 0.011813483556771328 and parameters: {'factors': 213, 'regularization': 0.08810212483929034, 'iterations': 51}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/30 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:05,275][0m Trial 5 finished with value: 0.01263439481332771 and parameters: {'factors': 186, 'regularization': 0.09401353155873074, 'iterations': 30}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/45 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:11,538][0m Trial 6 finished with value: 0.010311371286929837 and parameters: {'factors': 69, 'regularization': 0.06402226968500528, 'iterations': 45}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:18,561][0m Trial 7 finished with value: 0.011469794152679614 and parameters: {'factors': 119, 'regularization': 0.038497013950776135, 'iterations': 32}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/35 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:22,076][0m Trial 8 finished with value: 0.005936624717019416 and parameters: {'factors': 16, 'regularization': 0.0516959505254502, 'iterations': 35}. Best is trial 2 with value: 0.012689109939846495.[0m


  0%|          | 0/59 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:35,897][0m Trial 9 finished with value: 0.012921108800580798 and parameters: {'factors': 206, 'regularization': 0.06508502391564211, 'iterations': 59}. Best is trial 9 with value: 0.012921108800580798.[0m


  0%|          | 0/64 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:04:54,397][0m Trial 10 finished with value: 0.012667300104849368 and parameters: {'factors': 250, 'regularization': 0.008571923825535115, 'iterations': 64}. Best is trial 9 with value: 0.012921108800580798.[0m


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:01,965][0m Trial 11 finished with value: 0.013763672593826262 and parameters: {'factors': 174, 'regularization': 0.07055367183915115, 'iterations': 16}. Best is trial 11 with value: 0.013763672593826262.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:10,395][0m Trial 12 finished with value: 0.01539939925274829 and parameters: {'factors': 217, 'regularization': 0.06961014348889616, 'iterations': 12}. Best is trial 12 with value: 0.01539939925274829.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:19,007][0m Trial 13 finished with value: 0.01589968091021336 and parameters: {'factors': 248, 'regularization': 0.07299855012333815, 'iterations': 8}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:30,520][0m Trial 14 finished with value: 0.014568630198309868 and parameters: {'factors': 255, 'regularization': 0.07607701027304488, 'iterations': 20}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:38,713][0m Trial 15 finished with value: 0.01467372115479568 and parameters: {'factors': 225, 'regularization': 0.05432481359946307, 'iterations': 9}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:46,830][0m Trial 16 finished with value: 0.015717448820447226 and parameters: {'factors': 231, 'regularization': 0.07966850448962176, 'iterations': 8}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/22 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:05:57,841][0m Trial 17 finished with value: 0.014715918702095068 and parameters: {'factors': 240, 'regularization': 0.08076264198735539, 'iterations': 22}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:05,445][0m Trial 18 finished with value: 0.014291088101740903 and parameters: {'factors': 186, 'regularization': 0.09795008337452082, 'iterations': 15}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:16,852][0m Trial 19 finished with value: 0.014004521172089621 and parameters: {'factors': 196, 'regularization': 0.07887535785676394, 'iterations': 41}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/27 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:20,038][0m Trial 20 finished with value: 0.005917196144624264 and parameters: {'factors': 10, 'regularization': 0.06115968512273271, 'iterations': 27}. Best is trial 13 with value: 0.01589968091021336.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:27,809][0m Trial 21 finished with value: 0.015910344160401393 and parameters: {'factors': 227, 'regularization': 0.07242167369751908, 'iterations': 8}. Best is trial 21 with value: 0.015910344160401393.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:38,100][0m Trial 22 finished with value: 0.01528005905700742 and parameters: {'factors': 241, 'regularization': 0.08130916973722578, 'iterations': 18}. Best is trial 21 with value: 0.015910344160401393.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:46,442][0m Trial 23 finished with value: 0.015399927065869732 and parameters: {'factors': 229, 'regularization': 0.07399670307035859, 'iterations': 8}. Best is trial 21 with value: 0.015910344160401393.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:06:54,111][0m Trial 24 finished with value: 0.015966633899042813 and parameters: {'factors': 166, 'regularization': 0.05720020359641188, 'iterations': 12}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:01,246][0m Trial 25 finished with value: 0.01322324753624903 and parameters: {'factors': 160, 'regularization': 0.05769688298791661, 'iterations': 14}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/23 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:08,041][0m Trial 26 finished with value: 0.011450230765032286 and parameters: {'factors': 136, 'regularization': 0.04051340529031838, 'iterations': 23}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:16,106][0m Trial 27 finished with value: 0.013380387467648296 and parameters: {'factors': 166, 'regularization': 0.05876045090124159, 'iterations': 18}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:24,363][0m Trial 28 finished with value: 0.01550755656327846 and parameters: {'factors': 202, 'regularization': 0.06710869334808733, 'iterations': 12}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:29,551][0m Trial 29 finished with value: 0.010162160824008386 and parameters: {'factors': 98, 'regularization': 0.0454340494838624, 'iterations': 8}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:38,773][0m Trial 30 finished with value: 0.011096646581511013 and parameters: {'factors': 148, 'regularization': 0.04924546568272155, 'iterations': 41}. Best is trial 24 with value: 0.015966633899042813.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:46,769][0m Trial 31 finished with value: 0.016071360379367374 and parameters: {'factors': 233, 'regularization': 0.08783474796634028, 'iterations': 8}. Best is trial 31 with value: 0.016071360379367374.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:07:55,759][0m Trial 32 finished with value: 0.01392597747055301 and parameters: {'factors': 254, 'regularization': 0.08619495327083522, 'iterations': 11}. Best is trial 31 with value: 0.016071360379367374.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:03,082][0m Trial 33 finished with value: 0.014472319083251448 and parameters: {'factors': 178, 'regularization': 0.08545711951011917, 'iterations': 15}. Best is trial 31 with value: 0.016071360379367374.[0m


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:13,454][0m Trial 34 finished with value: 0.013907710649840834 and parameters: {'factors': 218, 'regularization': 0.07209590242566129, 'iterations': 26}. Best is trial 31 with value: 0.016071360379367374.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:22,435][0m Trial 35 finished with value: 0.017430806262161168 and parameters: {'factors': 237, 'regularization': 0.09128734640602251, 'iterations': 11}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:31,297][0m Trial 36 finished with value: 0.013539037816969286 and parameters: {'factors': 193, 'regularization': 0.0912172506094052, 'iterations': 20}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:35,039][0m Trial 37 finished with value: 0.008626707504636247 and parameters: {'factors': 38, 'regularization': 0.09324618115841976, 'iterations': 13}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:44,102][0m Trial 38 finished with value: 0.013504620847705958 and parameters: {'factors': 210, 'regularization': 0.0973480006053106, 'iterations': 17}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:08:50,635][0m Trial 39 finished with value: 0.01332605182269834 and parameters: {'factors': 149, 'regularization': 0.08887654401510975, 'iterations': 11}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/28 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:01,803][0m Trial 40 finished with value: 0.014195353698539769 and parameters: {'factors': 233, 'regularization': 0.08337103352827392, 'iterations': 28}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:10,718][0m Trial 41 finished with value: 0.01603340705950662 and parameters: {'factors': 235, 'regularization': 0.07502345556349099, 'iterations': 10}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:19,086][0m Trial 42 finished with value: 0.015010804365862846 and parameters: {'factors': 221, 'regularization': 0.08899151678073094, 'iterations': 11}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:28,430][0m Trial 43 finished with value: 0.01402442338162433 and parameters: {'factors': 238, 'regularization': 0.09924951265634153, 'iterations': 14}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:37,520][0m Trial 44 finished with value: 0.014766296175858699 and parameters: {'factors': 205, 'regularization': 0.06328084826796766, 'iterations': 20}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:42,124][0m Trial 45 finished with value: 0.010601483810020677 and parameters: {'factors': 75, 'regularization': 0.07663655287133894, 'iterations': 10}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:09:48,803][0m Trial 46 finished with value: 0.01155181863621382 and parameters: {'factors': 113, 'regularization': 0.06784211465990819, 'iterations': 33}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/24 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:00,059][0m Trial 47 finished with value: 0.015056247289658542 and parameters: {'factors': 245, 'regularization': 0.08392815140499577, 'iterations': 24}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:09,056][0m Trial 48 finished with value: 0.014683801076508043 and parameters: {'factors': 214, 'regularization': 0.09333131459708965, 'iterations': 17}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:16,430][0m Trial 49 finished with value: 0.01540525710584347 and parameters: {'factors': 185, 'regularization': 0.07597191151664977, 'iterations': 13}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:24,409][0m Trial 50 finished with value: 0.016370318200206832 and parameters: {'factors': 224, 'regularization': 0.07005488226839858, 'iterations': 10}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:32,629][0m Trial 51 finished with value: 0.016441258871208328 and parameters: {'factors': 228, 'regularization': 0.06804851522297199, 'iterations': 10}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:41,419][0m Trial 52 finished with value: 0.015196196510747932 and parameters: {'factors': 256, 'regularization': 0.06373528990507984, 'iterations': 10}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:49,245][0m Trial 53 finished with value: 0.014454096400862737 and parameters: {'factors': 198, 'regularization': 0.06683824093610942, 'iterations': 15}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:10:57,925][0m Trial 54 finished with value: 0.014726191717370668 and parameters: {'factors': 237, 'regularization': 0.07069193279712754, 'iterations': 12}. Best is trial 35 with value: 0.017430806262161168.[0m


  0%|          | 0/47 [00:00<?, ?it/s]

[32m[I 2023-03-11 13:11:10,545][0m Trial 55 finished with value: 0.012952984349757708 and parameters: {'factors': 224, 'regularization': 0.0782656620644333, 'iterations': 47}. Best is trial 35 with value: 0.017430806262161168.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [17]:
precision_model = AlternatingLeastSquares(**precision_best_trial.params)
precision_model.fit(csr_train)

  0%|          | 0/38 [00:00<?, ?it/s]

In [18]:
recall_model = AlternatingLeastSquares(**recall_best_trial.params)
recall_model.fit(csr_train)

  0%|          | 0/11 [00:00<?, ?it/s]

In [19]:
models = {
    "recall_model": recall_model,
    "precision_model": precision_model
}

In [20]:
metrics = {
    "precision": precision,
    "recall": recall
}

In [21]:
for model_name, model in models.items():
    pred = pd.DataFrame(interactions.test["user_id"].drop_duplicates())
    print(f"Model '{model_name}':")
    pred["item_id"] = pred["user_id"].apply(
        lambda user_id: 
            model.recommend(
                user_id, 
                csr_train[user_id],
                N=Config.K,
                filter_already_liked_items=True
            )[0]
    )
    pred = pred.explode("item_id")
    for metric_name, metric in metrics.items():
        metric_val = interactions.test["user_id"].drop_duplicates().apply(
            lambda user_id: 
                metric.calculate(
                    pred[pred["user_id"] == user_id]["item_id"],
                    interactions.test[interactions.test["user_id"] == user_id]["item_id"]
                )
        ).mean()
        print(f"\tMetric {metric_name}@{Config.K}: {metric_val}")

Model 'recall_model':
	Metric precision@10: 0.0023210831721469983
	Metric recall@10: 0.008298372726564213
Model 'precision_model':
	Metric precision@10: 0.0019825918762088955
	Metric recall@10: 0.006664199736024153


In [22]:
with (Config.MODEL_PATH / "als_binary.pickle").open("wb") as file_object:
    pickle.dump(recall_model, file_object)