In [1]:
import numpy as np
from implicit.cpu.als import AlternatingLeastSquares
from scipy.sparse import csr_matrix


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import threadpoolctl
threadpoolctl.threadpool_limits(1, "blas")

<threadpoolctl.threadpool_limits at 0x7f9cd2916ad0>

### Load data


In [3]:
data = np.load("../data/interim/train.npy")

data.shape

(943, 1682)

In [4]:
sparse = csr_matrix(data)

### Train models

I plan to train several ALS models with different hyperparameters (number of factors and regularization coefficient)

In [7]:
factors_variants = [50, 100, 200, 300]
regularization_variants = [0.01, 0.05, 0.1]
iterations = 200
seed = 126

for factors in factors_variants:
    for regularization in regularization_variants:
    
        model_name = f"fac{factors}_reg{regularization}"
        print(model_name)

        model = AlternatingLeastSquares(
            factors=factors, regularization=regularization, iterations=iterations, random_state=seed
        )
        model.fit(sparse)

        model.save("../models/" + model_name + ".npz")



fac50_reg0.01


100%|██████████| 200/200 [00:01<00:00, 128.92it/s]


fac50_reg0.05


100%|██████████| 200/200 [00:01<00:00, 131.52it/s]


fac50_reg0.1


100%|██████████| 200/200 [00:01<00:00, 122.00it/s]


fac100_reg0.01


100%|██████████| 200/200 [00:01<00:00, 110.53it/s]


fac100_reg0.05


100%|██████████| 200/200 [00:02<00:00, 97.97it/s] 


fac100_reg0.1


100%|██████████| 200/200 [00:02<00:00, 87.76it/s] 


fac200_reg0.01


100%|██████████| 200/200 [00:03<00:00, 64.46it/s]


fac200_reg0.05


100%|██████████| 200/200 [00:03<00:00, 63.41it/s]


fac200_reg0.1


100%|██████████| 200/200 [00:03<00:00, 51.70it/s]


fac300_reg0.01


100%|██████████| 200/200 [00:05<00:00, 35.21it/s]


fac300_reg0.05


100%|██████████| 200/200 [00:06<00:00, 33.11it/s]


fac300_reg0.1


100%|██████████| 200/200 [00:05<00:00, 35.59it/s]
