In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from collie.movielens import read_movielens_df
from collie.utils import convert_to_implicit


# read in explicit MovieLens 100K data
df = read_movielens_df()

# convert the data to implicit
df_imp = convert_to_implicit(df)

  "`pytorch_lightning.metrics.*` module has been renamed to `torchmetrics.*` and split off to its own package"


In [3]:
from collie.interactions import SequentialInteractions


interactions = SequentialInteractions(users=df_imp['user_id'],
                                      items=df_imp['item_id'],
                                      timestamps=df_imp['timestamp'],
                                      max_sequence_length=7,
                                      min_sequence_length=2,
                                      max_time=1210000,  # 2 weeks
                                      step_size=3,
                                      num_negative_samples=10,
                                      allow_missing_ids=True,
                                      seed=42)


interactions

  0%|          | 0/942 [00:00<?, ?it/s]

SequentialInteractions object with 18460 sequences between 942 users and 1674 items, returning 10 negative samples per interaction with sequence length between 2 and 7, and step size 3.

In [4]:
from collie.cross_validation import user_based_sequential_train_test_split


train, val = user_based_sequential_train_test_split(interactions=interactions, test_p=0.2, seed=42)

  0%|          | 0/755 [00:00<?, ?it/s]

  0%|          | 0/187 [00:00<?, ?it/s]

In [5]:
train, val

(SequentialInteractions object with 14742 sequences between 755 users and 1674 items, returning 10 negative samples per interaction with sequence length between 2 and 7, and step size 3.,
 SequentialInteractions object with 3731 sequences between 187 users and 1674 items, returning 10 negative samples per interaction with sequence length between 2 and 7, and step size 3.)

In [6]:
from collie.model.sequential_cnn import SequentialCNNModel


model = SequentialCNNModel(train=train,
                           val=val,
                           embedding_dim=10,
                           residual_connections=True,
                           lr=1e-2,
                           bias_lr=1e-1,
                           optimizer='adam',
                           bias_optimizer='sgd')


model

SequentialCNNModel(
  (item_embeddings): ScaledEmbedding(1675, 10, padding_idx=1674)
  (item_biases): ZeroEmbedding(1675, 1, padding_idx=1674)
  (cnn_0): Conv2d(10, 10, kernel_size=(3, 1), stride=(1, 1))
)

In [7]:
from collie.model import CollieTrainer


trainer = CollieTrainer(model=model, max_epochs=20, logger=False, checkpoint_callback=False)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [8]:
trainer.fit(model)


  | Name            | Type            | Params
----------------------------------------------------
0 | item_embeddings | ScaledEmbedding | 16.8 K
1 | item_biases     | ZeroEmbedding   | 1.7 K 
2 | cnn_0           | Conv2d          | 310   
----------------------------------------------------
18.7 K    Trainable params
0         Non-trainable params
18.7 K    Total params
0.075     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch     9: reducing learning rate of group 0 to 1.0000e-03.
Epoch     9: reducing learning rate of group 0 to 1.0000e-02.


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch    15: reducing learning rate of group 0 to 1.0000e-04.
Epoch    15: reducing learning rate of group 0 to 1.0000e-03.


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Epoch    17: reducing learning rate of group 0 to 1.0000e-05.
Epoch    17: reducing learning rate of group 0 to 1.0000e-04.


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [9]:
from collie.metrics import sequential_evaluate_in_batches, sequential_mapk, sequential_mrr


sequential_evaluate_in_batches(metric_list=[sequential_mapk, sequential_mrr],
                               test_interactions=val,
                               model=model,
                               k=10,
                               verbose=True)

  0%|          | 0/187 [00:00<?, ?it/s]

  device=device,


[0.04180386976554224, 0.05749833786720822]

----- 

In [10]:
from collie.model.sequential_cnn import SequentialCNNModel


model = SequentialCNNModel(train=train,
                           val=val,
                           embedding_dim=10,
                           residual_connections=True,
                           lr=1e-2,
                           bias_lr=1e-1,
                           optimizer='adam',
                           bias_optimizer='sgd')


model

SequentialCNNModel(
  (item_embeddings): ScaledEmbedding(1675, 10, padding_idx=1674)
  (item_biases): ZeroEmbedding(1675, 1, padding_idx=1674)
  (cnn_0): Conv2d(10, 10, kernel_size=(3, 1), stride=(1, 1))
)

In [11]:
from collie.metrics import sequential_evaluate_in_batches, sequential_mapk, sequential_mrr


sequential_evaluate_in_batches(metric_list=[sequential_mapk, sequential_mrr],
                               test_interactions=val,
                               model=model,
                               k=10,
                               verbose=True)

  0%|          | 0/187 [00:00<?, ?it/s]

[0.0020190127332984473, 0.005169964811482091]

----- 