In [1]:
import os
os.chdir("../")
import pickle
import pandas as pd

from implicit.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix
from typing import Callable
from src.config import Config
from src.dataset import Interactions
from src.metrics import PrecisionAtK, RecallAtK, FScoreAtK
from src.optimizer import OptunaMaximizer

In [2]:
with Config.PREPROCESSED_INTERACTIONS_PATH.open("rb") as file_object:
    interactions = Interactions(pickle.load(file_object), test_days=7, valid_days=14)

In [3]:
interactions.train = interactions.train[["user_id", "item_id", "interest_score"]]
interactions.test = interactions.test[["user_id", "item_id", "interest_score"]]
interactions.valid = interactions.valid[["user_id", "item_id", "interest_score"]]

In [4]:
interactions.train

Unnamed: 0,user_id,item_id,interest_score
0,126706,14433,0.400
1,127290,140952,0.290
2,66991,198453,0.445
3,46791,83486,0.615
4,79313,188770,0.940
...,...,...,...
1532993,153908,98585,0.220
1532994,154008,251969,0.020
1532995,154892,298192,0.840
1532996,156948,38118,0.890


In [5]:
interactions.test

Unnamed: 0,user_id,item_id,interest_score
1517915,101642,319500,0.835
1517917,130425,193445,0.490
1517918,93986,80733,0.235
1517919,159466,124115,0.420
1517920,158775,223806,0.270
...,...,...,...
1530838,141930,219928,0.450
1530839,53358,42887,0.290
1530840,151170,284652,0.135
1530841,141293,273421,0.240


In [6]:
interactions.valid

Unnamed: 0,user_id,item_id,interest_score
1503047,22032,287219,0.275
1503048,84214,121609,1.000
1503049,28992,11482,0.110
1503050,23345,281921,1.000
1503051,49466,2880,0.010
...,...,...,...
1517908,81768,294370,0.850
1517910,158991,99669,0.815
1517911,77232,142149,0.020
1517912,17843,174535,0.060


In [7]:
csr_train = csr_matrix(
    (interactions.train["interest_score"], 
    (interactions.train["user_id"], interactions.train["item_id"]))
)
csr_train

<159613x321752 sparse matrix of type '<class 'numpy.float32'>'
	with 1306588 stored elements in Compressed Sparse Row format>

In [8]:
class Objective:
    def __init__(self, train: csr_matrix, valid: pd.DataFrame, metric: Callable):
        self.train = train
        self.valid = valid
        self.metric = metric

    def __call__(self, trial) -> float:
        search_space = {
            "factors": trial.suggest_int("factors", 4, 256),
            "regularization": trial.suggest_float("regularization", 1e-8, 0.1),
            "iterations": trial.suggest_int("iterations", 8, 64),
            "num_threads": Config.NUM_THREADS
        }
        model = AlternatingLeastSquares(**search_space)
        model.fit(self.train)
        predictions = pd.DataFrame(self.valid["user_id"].drop_duplicates())
        predictions["item_id"] = predictions["user_id"].apply(
            lambda user_id: 
                model.recommend(
                    user_id, 
                    self.train[user_id],
                    N=Config.K,
                    filter_already_liked_items=True
                )[0]
        )
        predictions = predictions.explode("item_id")
        return self.valid["user_id"].drop_duplicates().apply(
            lambda user_id: 
                self.metric.calculate(
                    predictions[predictions["user_id"] == user_id]["item_id"],
                    self.valid[self.valid["user_id"] == user_id]["item_id"]
                )
        ).mean()

In [9]:
recall = RecallAtK(Config.K)
precision = PrecisionAtK(Config.K)

In [10]:
precision_objective = Objective(csr_train, interactions.valid, precision)
precision_optimizer = OptunaMaximizer(precision_objective, "ALS precision maximize")
precision_best_trial = precision_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-11 14:25:03,901][0m A new study created in memory with name: ALS precision maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/33 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:25:14,031][0m Trial 0 finished with value: 0.0017079332996462837 and parameters: {'factors': 68, 'regularization': 0.08287552288829421, 'iterations': 33}. Best is trial 0 with value: 0.0017079332996462837.[0m


  0%|          | 0/19 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:25:26,990][0m Trial 1 finished with value: 0.002162708438605359 and parameters: {'factors': 159, 'regularization': 0.028882696999654466, 'iterations': 19}. Best is trial 1 with value: 0.002162708438605359.[0m


  0%|          | 0/38 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:25:46,270][0m Trial 2 finished with value: 0.0023446184941889898 and parameters: {'factors': 219, 'regularization': 0.008968596290308448, 'iterations': 38}. Best is trial 2 with value: 0.0023446184941889898.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:25:56,933][0m Trial 3 finished with value: 0.0021627084386053592 and parameters: {'factors': 128, 'regularization': 0.05097699916460718, 'iterations': 8}. Best is trial 2 with value: 0.0023446184941889898.[0m


  0%|          | 0/50 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:26:12,887][0m Trial 4 finished with value: 0.00199090449722082 and parameters: {'factors': 157, 'regularization': 0.07789434936917536, 'iterations': 50}. Best is trial 2 with value: 0.0023446184941889898.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:26:26,396][0m Trial 5 finished with value: 0.002162708438605359 and parameters: {'factors': 163, 'regularization': 0.0555650504586655, 'iterations': 18}. Best is trial 2 with value: 0.0023446184941889898.[0m


  0%|          | 0/23 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:26:41,597][0m Trial 6 finished with value: 0.002031328954017182 and parameters: {'factors': 155, 'regularization': 0.0011357038136145634, 'iterations': 23}. Best is trial 2 with value: 0.0023446184941889898.[0m


  0%|          | 0/38 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:27:03,960][0m Trial 7 finished with value: 0.0024254674077817157 and parameters: {'factors': 255, 'regularization': 0.06514828270236057, 'iterations': 38}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:27:19,751][0m Trial 8 finished with value: 0.0022132390096008124 and parameters: {'factors': 189, 'regularization': 0.07100575423680579, 'iterations': 25}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/52 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:27:40,774][0m Trial 9 finished with value: 0.002233451237998994 and parameters: {'factors': 203, 'regularization': 0.027521841569363586, 'iterations': 52}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/62 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:28:07,887][0m Trial 10 finished with value: 0.0023547246083880807 and parameters: {'factors': 243, 'regularization': 0.09731753840284488, 'iterations': 62}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/59 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:28:34,526][0m Trial 11 finished with value: 0.0023850429509853525 and parameters: {'factors': 256, 'regularization': 0.09856737816278613, 'iterations': 59}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/63 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:29:02,845][0m Trial 12 finished with value: 0.002354724608388081 and parameters: {'factors': 244, 'regularization': 0.09849837568794109, 'iterations': 63}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/45 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:29:11,246][0m Trial 13 finished with value: 0.0011824153612935808 and parameters: {'factors': 22, 'regularization': 0.0650830024682132, 'iterations': 45}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/55 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:29:25,166][0m Trial 14 finished with value: 0.001970692268822639 and parameters: {'factors': 102, 'regularization': 0.08615015769276313, 'iterations': 55}. Best is trial 7 with value: 0.0024254674077817157.[0m


  0%|          | 0/39 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:29:48,796][0m Trial 15 finished with value: 0.0024759979787771676 and parameters: {'factors': 256, 'regularization': 0.06362935610053393, 'iterations': 39}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/39 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:30:09,965][0m Trial 16 finished with value: 0.0023244062657908084 and parameters: {'factors': 218, 'regularization': 0.06203859664320829, 'iterations': 39}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:30:30,843][0m Trial 17 finished with value: 0.0023041940373926284 and parameters: {'factors': 227, 'regularization': 0.046879284143831246, 'iterations': 32}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/46 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:30:49,864][0m Trial 18 finished with value: 0.0022637695805962648 and parameters: {'factors': 191, 'regularization': 0.07082121305787151, 'iterations': 46}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:31:00,509][0m Trial 19 finished with value: 0.0017180394138453744 and parameters: {'factors': 70, 'regularization': 0.044990067731481544, 'iterations': 42}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/29 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:31:17,086][0m Trial 20 finished with value: 0.002233451237998993 and parameters: {'factors': 186, 'regularization': 0.06287168910172083, 'iterations': 29}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/56 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:31:43,546][0m Trial 21 finished with value: 0.0023648307225871716 and parameters: {'factors': 249, 'regularization': 0.0910298452169326, 'iterations': 56}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/48 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:32:09,075][0m Trial 22 finished with value: 0.0023446184941889906 and parameters: {'factors': 254, 'regularization': 0.07612431464284393, 'iterations': 48}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/59 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:32:34,695][0m Trial 23 finished with value: 0.002253663466397174 and parameters: {'factors': 228, 'regularization': 0.08798928653177325, 'iterations': 59}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:32:58,618][0m Trial 24 finished with value: 0.0023850429509853534 and parameters: {'factors': 256, 'regularization': 0.07931236438729866, 'iterations': 41}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/41 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:33:20,066][0m Trial 25 finished with value: 0.002334512379989899 and parameters: {'factors': 212, 'regularization': 0.07740419850765616, 'iterations': 41}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/35 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:33:41,509][0m Trial 26 finished with value: 0.002324406265790809 and parameters: {'factors': 230, 'regularization': 0.06804962617667659, 'iterations': 35}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/29 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:33:59,929][0m Trial 27 finished with value: 0.002283981808994447 and parameters: {'factors': 204, 'regularization': 0.0581453210702831, 'iterations': 29}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/36 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:34:22,564][0m Trial 28 finished with value: 0.0023345123799898997 and parameters: {'factors': 235, 'regularization': 0.07977571705062679, 'iterations': 36}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:34:30,323][0m Trial 29 finished with value: 0.0009701869631126819 and parameters: {'factors': 16, 'regularization': 0.08530903110609173, 'iterations': 32}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:34:43,902][0m Trial 30 finished with value: 0.0019302678120262763 and parameters: {'factors': 105, 'regularization': 0.07286901623392529, 'iterations': 42}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/52 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:35:10,493][0m Trial 31 finished with value: 0.002385042950985353 and parameters: {'factors': 256, 'regularization': 0.09364616609781733, 'iterations': 52}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/44 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:35:35,863][0m Trial 32 finished with value: 0.002395149065184443 and parameters: {'factors': 255, 'regularization': 0.08257136162606342, 'iterations': 44}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/45 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:36:00,750][0m Trial 33 finished with value: 0.002374936836786261 and parameters: {'factors': 239, 'regularization': 0.08169216574448732, 'iterations': 45}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/37 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:36:19,097][0m Trial 34 finished with value: 0.002102071753410816 and parameters: {'factors': 173, 'regularization': 0.08244262301635853, 'iterations': 37}. Best is trial 15 with value: 0.0024759979787771676.[0m


  0%|          | 0/40 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:36:35,583][0m Trial 35 finished with value: 0.0020515411824153637 and parameters: {'factors': 137, 'regularization': 0.0679144638072739, 'iterations': 40}. Best is trial 15 with value: 0.0024759979787771676.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [11]:
recall_objective = Objective(csr_train, interactions.valid, recall)
recall_optimizer = OptunaMaximizer(recall_objective, "ALS recall maximize")
recall_best_trial = recall_optimizer.optimize(Config.ITERATIONS, Config.STOP_ITERATION)

[32m[I 2023-03-11 14:36:35,664][0m A new study created in memory with name: ALS recall maximize[0m
  self._init_valid()


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/56 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:02,393][0m Trial 0 finished with value: 0.01037806531237557 and parameters: {'factors': 234, 'regularization': 0.032987659441796865, 'iterations': 56}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:11,768][0m Trial 1 finished with value: 0.006002649118353141 and parameters: {'factors': 34, 'regularization': 0.0028637352198565696, 'iterations': 31}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/27 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:25,003][0m Trial 2 finished with value: 0.008654645141507194 and parameters: {'factors': 122, 'regularization': 0.06081223370252223, 'iterations': 27}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:36,934][0m Trial 3 finished with value: 0.00909165914092645 and parameters: {'factors': 108, 'regularization': 0.005734634761465523, 'iterations': 13}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/26 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:45,346][0m Trial 4 finished with value: 0.003739129603202367 and parameters: {'factors': 14, 'regularization': 0.012990881035676582, 'iterations': 26}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:37:57,730][0m Trial 5 finished with value: 0.0076143146898578935 and parameters: {'factors': 65, 'regularization': 0.005037014081657871, 'iterations': 42}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/42 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:38:06,039][0m Trial 6 finished with value: 0.0034151026184771815 and parameters: {'factors': 9, 'regularization': 0.019683676588139656, 'iterations': 42}. Best is trial 0 with value: 0.01037806531237557.[0m


  0%|          | 0/55 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:38:33,017][0m Trial 7 finished with value: 0.010571204383735966 and parameters: {'factors': 243, 'regularization': 0.029945197328019858, 'iterations': 55}. Best is trial 7 with value: 0.010571204383735966.[0m


  0%|          | 0/13 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:38:49,035][0m Trial 8 finished with value: 0.010864209200490154 and parameters: {'factors': 199, 'regularization': 0.04786881952027468, 'iterations': 13}. Best is trial 8 with value: 0.010864209200490154.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:38:57,698][0m Trial 9 finished with value: 0.006626518588368007 and parameters: {'factors': 44, 'regularization': 0.007682823070652539, 'iterations': 25}. Best is trial 8 with value: 0.010864209200490154.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:39:12,340][0m Trial 10 finished with value: 0.010758369557005234 and parameters: {'factors': 187, 'regularization': 0.08969970461137287, 'iterations': 10}. Best is trial 8 with value: 0.010864209200490154.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:39:27,004][0m Trial 11 finished with value: 0.01123692979301873 and parameters: {'factors': 193, 'regularization': 0.08928528173225378, 'iterations': 8}. Best is trial 11 with value: 0.01123692979301873.[0m


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:39:42,130][0m Trial 12 finished with value: 0.009612892160896207 and parameters: {'factors': 179, 'regularization': 0.0628642210861179, 'iterations': 16}. Best is trial 11 with value: 0.01123692979301873.[0m


  0%|          | 0/17 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:39:58,750][0m Trial 13 finished with value: 0.010758215312283024 and parameters: {'factors': 192, 'regularization': 0.09134927308257812, 'iterations': 17}. Best is trial 11 with value: 0.01123692979301873.[0m


  0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:40:14,509][0m Trial 14 finished with value: 0.010031530285446408 and parameters: {'factors': 154, 'regularization': 0.04832667548800227, 'iterations': 9}. Best is trial 11 with value: 0.01123692979301873.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:40:32,925][0m Trial 15 finished with value: 0.011172004543656898 and parameters: {'factors': 215, 'regularization': 0.09939327815219752, 'iterations': 18}. Best is trial 11 with value: 0.01123692979301873.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:40:52,085][0m Trial 16 finished with value: 0.011324442078610851 and parameters: {'factors': 220, 'regularization': 0.09900993001318334, 'iterations': 21}. Best is trial 16 with value: 0.011324442078610851.[0m


  0%|          | 0/64 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:41:13,946][0m Trial 17 finished with value: 0.008938836555056872 and parameters: {'factors': 154, 'regularization': 0.07993427471847603, 'iterations': 64}. Best is trial 16 with value: 0.011324442078610851.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:41:34,051][0m Trial 18 finished with value: 0.011929687522652787 and parameters: {'factors': 256, 'regularization': 0.08021396361374135, 'iterations': 20}. Best is trial 18 with value: 0.011929687522652787.[0m


  0%|          | 0/37 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:41:58,220][0m Trial 19 finished with value: 0.01078101422467264 and parameters: {'factors': 250, 'regularization': 0.07629400917812927, 'iterations': 37}. Best is trial 18 with value: 0.011929687522652787.[0m


  0%|          | 0/22 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:42:16,746][0m Trial 20 finished with value: 0.011062905148301817 and parameters: {'factors': 224, 'regularization': 0.09760465513560704, 'iterations': 22}. Best is trial 18 with value: 0.011929687522652787.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:42:32,843][0m Trial 21 finished with value: 0.01275051726668705 and parameters: {'factors': 222, 'regularization': 0.08337907598996913, 'iterations': 8}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/21 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:42:54,023][0m Trial 22 finished with value: 0.011774560520633416 and parameters: {'factors': 250, 'regularization': 0.07631911873053487, 'iterations': 21}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/31 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:43:16,379][0m Trial 23 finished with value: 0.011426998693294802 and parameters: {'factors': 240, 'regularization': 0.07653929653548222, 'iterations': 31}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:43:36,327][0m Trial 24 finished with value: 0.012003873064246112 and parameters: {'factors': 255, 'regularization': 0.06532055497985378, 'iterations': 20}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/32 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:43:53,162][0m Trial 25 finished with value: 0.009461825440855255 and parameters: {'factors': 166, 'regularization': 0.066993532669693, 'iterations': 32}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:44:11,618][0m Trial 26 finished with value: 0.011389162042269675 and parameters: {'factors': 211, 'regularization': 0.08452428179686311, 'iterations': 14}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/37 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:44:35,949][0m Trial 27 finished with value: 0.01120995136077413 and parameters: {'factors': 255, 'regularization': 0.06946287034391599, 'iterations': 37}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:44:55,338][0m Trial 28 finished with value: 0.01086581222382132 and parameters: {'factors': 231, 'regularization': 0.0581591759396126, 'iterations': 20}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/25 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:45:06,708][0m Trial 29 finished with value: 0.007620660598174493 and parameters: {'factors': 85, 'regularization': 0.055113720038443705, 'iterations': 25}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:45:24,081][0m Trial 30 finished with value: 0.011864807473700854 and parameters: {'factors': 230, 'regularization': 0.06972661124145667, 'iterations': 12}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:45:40,978][0m Trial 31 finished with value: 0.011813933497864774 and parameters: {'factors': 209, 'regularization': 0.07045370193663082, 'iterations': 12}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:45:58,675][0m Trial 32 finished with value: 0.01270469522617072 and parameters: {'factors': 235, 'regularization': 0.0837229630184759, 'iterations': 8}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:46:17,845][0m Trial 33 finished with value: 0.011874987204699181 and parameters: {'factors': 237, 'regularization': 0.08296959647883295, 'iterations': 16}. Best is trial 21 with value: 0.01275051726668705.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:46:36,254][0m Trial 34 finished with value: 0.013394277105747546 and parameters: {'factors': 255, 'regularization': 0.08431402755067025, 'iterations': 8}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:46:51,011][0m Trial 35 finished with value: 0.009849349320041592 and parameters: {'factors': 131, 'regularization': 0.08735481451311423, 'iterations': 10}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:47:06,301][0m Trial 36 finished with value: 0.011954666943297568 and parameters: {'factors': 207, 'regularization': 0.09308498669073799, 'iterations': 8}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:47:24,911][0m Trial 37 finished with value: 0.012195190974877689 and parameters: {'factors': 230, 'regularization': 0.08474194930238024, 'iterations': 14}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/52 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:47:50,142][0m Trial 38 finished with value: 0.010750791373580662 and parameters: {'factors': 230, 'regularization': 0.09484154352657688, 'iterations': 52}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:48:05,992][0m Trial 39 finished with value: 0.010158425455292563 and parameters: {'factors': 177, 'regularization': 0.08589564305031941, 'iterations': 14}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/46 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:48:24,601][0m Trial 40 finished with value: 0.00943647518053784 and parameters: {'factors': 133, 'regularization': 0.09466321983787658, 'iterations': 46}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:48:42,632][0m Trial 41 finished with value: 0.012696409760583586 and parameters: {'factors': 241, 'regularization': 0.08358524858222653, 'iterations': 11}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:49:00,549][0m Trial 42 finished with value: 0.012468428348923548 and parameters: {'factors': 241, 'regularization': 0.08316540407971336, 'iterations': 11}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:49:17,880][0m Trial 43 finished with value: 0.012493954308001808 and parameters: {'factors': 239, 'regularization': 0.07298625839261312, 'iterations': 11}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:49:34,091][0m Trial 44 finished with value: 0.011472775582174272 and parameters: {'factors': 202, 'regularization': 0.08934174502101706, 'iterations': 8}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/12 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:49:52,273][0m Trial 45 finished with value: 0.011559442045798795 and parameters: {'factors': 223, 'regularization': 0.07274078912057444, 'iterations': 12}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/16 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:50:12,324][0m Trial 46 finished with value: 0.011771309511329722 and parameters: {'factors': 242, 'regularization': 0.07840826301174278, 'iterations': 16}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/8 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:50:28,612][0m Trial 47 finished with value: 0.012239921523650681 and parameters: {'factors': 215, 'regularization': 0.07342828181384153, 'iterations': 8}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:50:40,362][0m Trial 48 finished with value: 0.00977221658247429 and parameters: {'factors': 110, 'regularization': 0.09052319480609278, 'iterations': 10}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/18 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:50:56,193][0m Trial 49 finished with value: 0.010639785653681564 and parameters: {'factors': 183, 'regularization': 0.08309509939723111, 'iterations': 18}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/29 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:51:14,021][0m Trial 50 finished with value: 0.01086620344452583 and parameters: {'factors': 198, 'regularization': 0.08104707262852519, 'iterations': 29}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/11 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:51:31,432][0m Trial 51 finished with value: 0.01271471351232759 and parameters: {'factors': 242, 'regularization': 0.08768695257512416, 'iterations': 11}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/14 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:51:50,061][0m Trial 52 finished with value: 0.01239436311821355 and parameters: {'factors': 244, 'regularization': 0.08741060089339941, 'iterations': 14}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/10 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:52:06,039][0m Trial 53 finished with value: 0.011878734369891522 and parameters: {'factors': 220, 'regularization': 0.09340844808662657, 'iterations': 10}. Best is trial 34 with value: 0.013394277105747546.[0m


  0%|          | 0/15 [00:00<?, ?it/s]

[32m[I 2023-03-11 14:52:25,518][0m Trial 54 finished with value: 0.012604390956325399 and parameters: {'factors': 246, 'regularization': 0.0795730926831978, 'iterations': 15}. Best is trial 34 with value: 0.013394277105747546.[0m
EarlyStopping Exceeded: No new best scores on iters 20


In [12]:
precision_model = AlternatingLeastSquares(**precision_best_trial.params)
precision_model.fit(csr_train)

  0%|          | 0/39 [00:00<?, ?it/s]

In [13]:
recall_model = AlternatingLeastSquares(**recall_best_trial.params)
recall_model.fit(csr_train)

  0%|          | 0/8 [00:00<?, ?it/s]

In [14]:
models = {
    "recall_model": recall_model,
    "precision_model": precision_model
}

In [15]:
metrics = {
    "precision": precision,
    "recall": recall
}

In [16]:
for model_name, model in models.items():
    pred = pd.DataFrame(interactions.test["user_id"].drop_duplicates())
    print(f"Model '{model_name}':")
    pred["item_id"] = pred["user_id"].apply(
        lambda user_id: 
            model.recommend(
                user_id, 
                csr_train[user_id],
                N=Config.K,
                filter_already_liked_items=True
            )[0]
    )
    pred = pred.explode("item_id")
    for metric_name, metric in metrics.items():
        metric_val = interactions.test["user_id"].drop_duplicates().apply(
            lambda user_id: 
                metric.calculate(
                    pred[pred["user_id"] == user_id]["item_id"],
                    interactions.test[interactions.test["user_id"] == user_id]["item_id"]
                )
        ).mean()
        print(f"\tMetric {metric_name}@{Config.K}: {metric_val}")

Model 'recall_model':
	Metric precision@10: 0.0018301528847226843
	Metric recall@10: 0.00969147252207179
Model 'precision_model':
	Metric precision@10: 0.0015734851021091366
	Metric recall@10: 0.008407195601705145


In [19]:
with (Config.MODEL_PATH / "als_trial.pickle").open("wb") as file_object:
    pickle.dump(recall_best_trial, file_object)