In [1]:
import numpy as np
import pytest
import torch
from torch import nn


In [2]:
from typing import Callable, List, Optional, Union

In [3]:
# Code used from https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
# Another option is to use torchvision.ops.MLP 
# which is nearly identical in implementation, but is in torchvision and not in base torch
class MLP(nn.Module):
    """A multi-layer perceptron module.
    This module is a sequence of linear layers plus activation functions.
    The user can optionally add normalization and/or dropout to each of the layers.
    
    Code used from https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py
    Args:
        in_dim (int): Input dimension.
        out_dim (int): Output dimension.
        hidden_dims (Optional[List[int]]): Output dimension for each hidden layer.
        dropout (float): Probability for dropout layer.
        activation (Callable[..., nn.Module]): Which activation
            function to use. Supports module type or partial.
        normalization (Optional[Callable[..., nn.Module]]): Which
            normalization layer to use (None for no normalization).
            Supports module type or partial.
    Inputs:
        x (Tensor): Tensor containing a batch of input sequences.
    """

    def __init__(
        self,
        in_dim: int,
        out_dim: int,
        hidden_dims: Optional[Union[int, List[int]]] = None,
        dropout: float = 0.5,
        activation: Callable[..., nn.Module] = nn.ReLU,
        normalization: Optional[Callable[..., nn.Module]] = None,
        **kwargs,
    ) -> None:
        super().__init__()

        layers = nn.ModuleList()

        if hidden_dims is None:
            hidden_dims = []

        if isinstance(hidden_dims, int):
            hidden_dims = [hidden_dims]

        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(in_dim, hidden_dim))
            if normalization:
                layers.append(normalization(hidden_dim))
            layers.append(activation())
            layers.append(nn.Dropout(dropout))
            in_dim = hidden_dim
        layers.append(nn.Linear(in_dim, out_dim))
        self.model = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)


In [4]:
EMBEDDING_SIZE = 3
N_USERS = 5
N_ITEMS = 10
a = MLP(2*EMBEDDING_SIZE, N_ITEMS, [6, 4, 2], dropout=0)

In [5]:
# TODO: add tests that check that the MLP works as expected.
def test_MLP_shapes():
    pass


In [16]:
class NMFModule(nn.Module):
    """Model that encodes the Neural Matrix Factorization Network.

    :param num_components: size of the embeddings
    :type num_components: int
    :param n_users: number of users in the network
    :type n_users: int
    :param n_items: number of items in the network
    :type n_items: int
    :param hidden_dims: dimensions of the MLP hidden layers.
    :type hidden_dims: Union[int, List[int]]
    :param dropout: Dropout chance between layers of the MLP
    :type dropout: float
    """

    def __init__(
        self, num_components: int, n_users: int, n_items: int, hidden_dims: Union[int, List[int]], dropout: float
    ):
        super().__init__()

        self.user_embedding = nn.Embedding(n_users, num_components)
        self.item_embedding = nn.Embedding(n_items, num_components)

        # 2 x embedding size as input, since the user and item embedding are concatenated.
        # Output is always 1, since we need a single score for u,i
        self.mlp = MLP(2 * num_components, 1, hidden_dims, dropout=dropout)

        # In order to interpret the output as a probability, the score should be between 0 and 1
        # The papers mentions probit / logistic activation,
        # but in pytorch sigmoid seems like the only one to give 0 to 1 values
        self.final = nn.Sigmoid()

        # weight initialization
        self.user_embedding.weight.data.normal_(0, 1.0 / self.user_embedding.embedding_dim)
        self.item_embedding.weight.data.normal_(0, 1.0 / self.item_embedding.embedding_dim)

    def forward(self, users: torch.LongTensor, items: torch.LongTensor) -> torch.FloatTensor:
        """Predict scores for the user item pairs obtained when zipping together the two 1D tensors

        :param users: 1D tensor with user ids
        :type users: torch.LongTensor
        :param items: 1D tensor with item ids
        :type items: torch.LongTensor
        :return: 1D tensor with on position i the prediction similarity between `users[i]` and `items[i]`
        :rtype: torch.FloatTensor
        """

        user_emb = self.user_embedding(users)
        item_emb = self.item_embedding(items)

        return self.final(self.mlp(torch.hstack([user_emb, item_emb])))


In [7]:
def test_output_shapes_NMF(embedding_size, num_users, num_items, hidden_sizes):
    """Check that no mather the inner settings of the network, the output is always correct"""
    mod = NMFModule(embedding_size, num_users, num_items, hidden_sizes)
    
    user_tensor = torch.LongTensor([1, 2])
    item_tensor = torch.LongTensor([1, 2])
    
    res = mod(user_tensor, item_tensor) # predict scores for items given the users
    
    assert res.shape == (2, 1)

    assert (res.detach().numpy() <= 1).all()
    assert (res.detach().numpy() >= 0).all()


test_output_shapes_NMF(5, 10, 10, [10])
test_output_shapes_NMF(5, 10, 10, [10, 5, 3])
test_output_shapes_NMF(5, 3, 10, [10])
test_output_shapes_NMF(1, 3, 3, [10])


def test_shape_input_check_NMF():
    """Check that no mather the inner settings of the network, the output is always correct"""
    
    mod = NMFModule(3, 3, 3, [10])
    
    user_tensor = torch.LongTensor([[1, 2]])
    item_tensor = torch.LongTensor([[1, 2]])
    
    with pytest.raises(ValueError):
        mod(user_tensor, item_tensor)
        
    user_tensor = torch.LongTensor([1, 2])
    item_tensor = torch.LongTensor([1, 2, 0])
    
    with pytest.raises(ValueError):
        mod(user_tensor, item_tensor)

    user_tensor = torch.LongTensor([[1, 2]])
    item_tensor = torch.LongTensor([1, 2])

    with pytest.raises(ValueError):
        mod(user_tensor, item_tensor)


test_shape_input_check_NMF()

In [9]:
from typing import List, Union, Optional

import pandas as pd
from recpack.algorithms.base import TorchMLAlgorithm
from recpack.algorithms.samplers import PositiveNegativeSampler
from recpack.algorithms.util import get_users
from recpack.data.matrix import InteractionMatrix
from scipy.sparse import csr_matrix, lil_matrix


In [11]:
class NeuMF(TorchMLAlgorithm):
    """Implementation of Neural Matrix Factoration.

    Neural Matrix Factorization based on MLP architecture
    as presented in Figure 2 in He, Xiangnan, et al. "Neural collaborative filtering."
    Proceedings of the 26th international conference on world wide web. 2017.

    Represents the users and items using an embedding, and models similarity using a neural network.
    An MLP is used with as input the concatenated embeddings of users and items.

    As in the paper, the sum of square error is used as the loss function.
    Positive items should get a prediction close to 1, while sampled negatives should get a value close to 0.
    The MLP has 3 layers, whose dimensions are based on the `num_components` parameter.
    Bottom layer has `num_components * 2`, middle layer `num_components`
    and the top layer has `num_components / 2` dimensions.

    :param num_components: Size of the embeddings, needs to be an even number.
    :type num_components: int
    :param batch_size: How many samples to use in each update step.
        Higher batch sizes make each epoch more efficient,
        but increases the amount of epochs needed to converge to the optimum,
        by reducing the amount of updates per epoch.
        Defaults to 512.
    :type batch_size: Optional[int]
    :param max_epochs: The max number of epochs to train.
        If the stopping criterion uses early stopping, less epochs could be used.
        Defaults to 10.
    :type max_epochs: Optional[int]
    :param learning_rate: How much to update the weights at each update. Defaults to 0.01
    :type learning_rate: Optional[float]
    :param stopping_criterion: Name of the stopping criterion to use for training.
        For available values,
        check :meth:`recpack.algorithms.stopping_criterion.StoppingCriterion.FUNCTIONS`
        Defaults to 'ndcg'
    :type stopping_criterion: Optional[str]
    :param stop_early: If True, early stopping is enabled,
        and after ``max_iter_no_change`` iterations where improvement of loss function
        is below ``min_improvement`` the optimisation is stopped,
        even if max_epochs is not reached.
        Defaults to False
    :type stop_early: bool, optional
    :param max_iter_no_change: If early stopping is enabled,
        stop after this amount of iterations without change.
        Defaults to 5
    :type max_iter_no_change: int, optional
    :param min_improvement: If early stopping is enabled, no change is detected,
        if the improvement is below this value.
        Defaults to 0.01
    :type min_improvement: float, optional
    :param seed: Seed to the randomizers, useful for reproducible results,
        defaults to None
    :type seed: int, optional
    :param save_best_to_file: If true, the best model will be saved after training,
        defaults to False
    :type save_best_to_file: bool, optional
    :param keep_last: Retain last model, rather than best
        (according to stopping criterion value on validation data), defaults to False
    :type keep_last: bool, optional
    :param predict_topK: The topK recommendations to keep per row in the matrix.
        Use when the user x item output matrix would become too large for RAM.
        Defaults to None, which results in no filtering.
    :type predict_topK: int, optional
    :param U: Amount of negatives to sample for each positive example, defaults to 1
    :type U: int, optional
    :param dropout: Dropout parameter used in MLP, defaults to 0.0
    :type dropout: float, optional
    :param exact_sampling: Enable or disable exact checks while sampling. 
        With exact sampling the sampled negatives are guaranteed to not have been visited by the user. 
        Non exact sampling assumes that the space for item selection is large enough, 
        such that most items are likely not seen before.
        Defaults to False,
    :type exact_sampling: bool, optional
    """

    def __init__(
        self,
        num_components: int,
        batch_size: Optional[int] = 512,
        max_epochs: Optional[int] = 10,
        learning_rate: Optional[float] = 0.01,
        stopping_criterion: Optional[str] = "ndcg",
        stop_early: Optional[bool] = False,
        max_iter_no_change: Optional[int] = 5,
        min_improvement: Optional[float] = 0.0,
        seed: Optional[int] = None,
        save_best_to_file: Optional[bool] = False,
        keep_last: Optional[bool] = False,
        predict_topK: Optional[int] = None,
        U: Optional[int] = 1,
        dropout: Optional[float] = 0.0,
        exact_sampling: Optional[bool] = False,
    ):
        super().__init__(
            batch_size,
            max_epochs,
            learning_rate,
            stopping_criterion,
            stop_early,
            max_iter_no_change,
            min_improvement,
            seed,
            save_best_to_file,
            keep_last,
            predict_topK,
        )

        self.num_components = num_components
        if self.num_components % 2 != 0:
            raise ValueError("Please use an even number of components for training the NeuMF model.")

        self.hidden_dims = [self.num_components * 2, self.num_components, self.num_components // 2]
        self.U = U
        self.dropout = dropout
        self.exact_sampling = exact_sampling

        self.sampler = PositiveNegativeSampler(
            U=self.U, replace=False, exact=exact_sampling, batch_size=self.batch_size
        )

    def _init_model(self, X: csr_matrix):
        num_users, num_items = X.shape
        self.model_ = NMFModule(self.num_components, num_users, num_items, self.hidden_dims, self.dropout).to(
            self.device
        )
        self.optimizer = torch.optim.Adam(self.model_.parameters(), lr=self.learning_rate)

    def _compute_loss(
        self, positive_scores: torch.FloatTensor, negative_scores: torch.FloatTensor
    ) -> torch.FloatTensor:
        """Compute the Square Error loss given recommendations for positive items, and sampled negatives."""
        mse = nn.MSELoss(reduction="sum")
        return mse(positive_scores, torch.ones_like(positive_scores, dtype=torch.float)) + mse(
            negative_scores, torch.zeros_like(negative_scores, dtype=torch.float)
        )

    def _train_epoch(self, X: csr_matrix) -> List[int]:
        losses = []
        for users, positives, negatives in self.sampler.sample(X):

            self.optimizer.zero_grad()

            # Predict for the positives
            positive_scores = self.model_.forward(users, positives)
            # Predict for the negatives
            negative_scores = self.model_.forward(*self._construct_negative_prediction_input(users, negatives))

            loss = self._compute_loss(positive_scores, negative_scores)

            # Backwards propagation of the loss
            loss.backward()
            self.optimizer.step()

            losses.append(loss.item())

        return losses

    def _construct_negative_prediction_input(self, users, negatives):
        """Since negatives has shape batch x U, and users is a 1d vector,
        these need to be turned into two 1d vectors of size batch*U

        First the users as a row are stacked U times and transposed,
        so that this is also a batch x U tensor

        then both are reshaped to remove the 2nd dimension, resulting in a single long 1d vector
        """
        return users.repeat(self.U, 1).T.reshape(-1), negatives.reshape(-1)

    def _batch_predict(self, X: csr_matrix, users: List[int]) -> csr_matrix:
        X_pred = lil_matrix(X.shape)
        if users is None:
            users = get_users(X)

        _, n_items = X.shape
        n_users = len(users)

        # Turn the np arrays and lists to torch tensors
        user_tensor = torch.LongTensor(users).repeat(n_items, 1).T.reshape(-1).to(self.device)
        item_tensor = torch.arange(n_items).repeat(n_users).to(self.device)

        X_pred[users] = self.model_(user_tensor, item_tensor).detach().cpu().numpy().reshape(n_users, n_items)
        return X_pred.tocsr()


In [12]:
TIMESTAMP_IX = 'ts'
ITEM_IX = 'iid'
USER_IX = 'uid'

data = {
    TIMESTAMP_IX: [3, 2, 1, 4, 0, 1, 2, 4, 0, 1, 2],
    ITEM_IX: [0, 1, 2, 3, 0, 1, 2, 4, 0, 1, 2],
    USER_IX: [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5],
}
df = pd.DataFrame.from_dict(data)

mat = InteractionMatrix(df, ITEM_IX, USER_IX, timestamp_ix=TIMESTAMP_IX)


In [18]:
from recpack.tests.test_algorithms.util import assert_changed, assert_same

def test_training_epoch(X):
    a = NeuMF(
        num_components=4, 
        U=2,
        exact_sampling=True
    )
    device = a.device
    a._init_model(X)

    # Each training epoch should update the parameters
    params = [np for np in a.model_.named_parameters() if np[1].requires_grad]
    params_before = [(name, p.clone()) for (name, p) in params]
    for _ in range(5):
        a._train_epoch(X)
    params = [np for np in a.model_.named_parameters() if np[1].requires_grad]
    assert_changed(params_before, params, device)

test_training_epoch(mat.binary_values)

In [21]:
def test_batch_predict(mat, users):
    a = NeuMF(
        num_components=4, 
        U=2,
        exact_sampling=True
    )
    device = a.device
    a.fit(mat, (mat, mat))
    params = [np for np in a.model_.named_parameters() if np[1].requires_grad]
    params_before = [(name, p.clone()) for (name, p) in params]

    pred = a._batch_predict(mat.users_in(users), users=users)

    assert pred.shape == mat.shape
    np.testing.assert_array_equal(pred.sum(axis=1).nonzero()[0], users)

    params = [np for np in a.model_.named_parameters() if np[1].requires_grad]
    assert_same(params_before, params, device)

    

test_batch_predict(mat, [0, 1])
test_batch_predict(mat, [0])
test_batch_predict(mat, [0, 1, 3])

2022-06-14 10:26:01,023 - base - recpack - INFO - Processed epoch 0 in 0.02 s.Batch Training Loss = 8.0996
2022-06-14 10:26:01,032 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.6654823631312136, which is better than previous iterations.
2022-06-14 10:26:01,033 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 10:26:01,038 - base - recpack - INFO - Evaluation at end of 0 took 0.01 s.
2022-06-14 10:26:01,058 - base - recpack - INFO - Processed epoch 1 in 0.02 s.Batch Training Loss = 8.0385
2022-06-14 10:26:01,066 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.6725666087662366, which is better than previous iterations.
2022-06-14 10:26:01,067 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 10:26:01,070 - base - recpack - INFO - Evaluation at end of 1 took 0.01 s.
2022-06-14 10:26:01,086 - base - recpack - INFO - Processed epoch 2 in 0.02 s.Batch Training Loss = 7.9893
2022-06-14 10:26:01,095

2022-06-14 10:26:01,772 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 10:26:01,781 - base - recpack - INFO - Evaluation at end of 0 took 0.02 s.
2022-06-14 10:26:01,828 - base - recpack - INFO - Processed epoch 1 in 0.05 s.Batch Training Loss = 7.3421
2022-06-14 10:26:01,839 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.6028241164651866, which is worse than previous iterations.
2022-06-14 10:26:01,842 - base - recpack - INFO - Evaluation at end of 1 took 0.01 s.
2022-06-14 10:26:01,859 - base - recpack - INFO - Processed epoch 2 in 0.02 s.Batch Training Loss = 7.3398
2022-06-14 10:26:01,867 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.6028241164651866, which is worse than previous iterations.
2022-06-14 10:26:01,868 - base - recpack - INFO - Evaluation at end of 2 took 0.01 s.
2022-06-14 10:26:01,879 - base - recpack - INFO - Processed epoch 3 in 0.01 s.Batch Training Loss = 7.3379
2022-06-14 10:26:01,886 - s

In [22]:
def test_negative_input_construction(users, negatives, U):
    
    a = NeuMF(
        num_components=8, 
        U=U
    )
    
    num_users = users.shape[0]
    users_input, negatives_input = a._construct_negative_prediction_input(users, negatives)
    assert users_input.shape == negatives_input.shape
    assert len(users_input.shape) == 1 # 1d vectors
    
    # Check that both are in the right order (each user is repeated U times before the next user is present)
    for ix in range(users_input.shape[0]):
        assert users_input[ix] == users[ix // U]
        assert negatives_input[ix] == negatives[ix // U, ix % U]

test_negative_input_construction(torch.LongTensor([4, 5, 6]), torch.LongTensor([[1, 2], [1, 2], [1, 2]]), U=2)
test_negative_input_construction(torch.LongTensor([4, 5, 6]), torch.LongTensor([[1], [1], [1]]), U=1)


In [24]:
def test_overfit(mat):
    m = NeuMF(
        num_components=10,
        batch_size=1,
        max_epochs=20,
        learning_rate=0.02,
        stopping_criterion="ndcg",
        U=1,
    )

    # set sampler to exact sampling
    m.sampler.exact = True
    m.fit(mat, (mat, mat))
    bin_mat = mat.binary_values
    pred = m.predict(mat.binary_values).toarray()
    for user in mat.active_users:
        # The model should have overfitted, so that the visited items have the highest similarities
        positives = bin_mat[user].nonzero()[1]
        negatives = list(set(range(mat.shape[1])) - set(positives))

        for item in positives:
            assert (pred[user][negatives] < pred[user, item]).all()
            
test_overfit(mat)
    

2022-06-14 10:26:24,089 - base - recpack - INFO - Processed epoch 0 in 0.04 s.Batch Training Loss = 0.4999
2022-06-14 10:26:24,104 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.8901179022730437, which is better than previous iterations.
2022-06-14 10:26:24,105 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 10:26:24,111 - base - recpack - INFO - Evaluation at end of 0 took 0.02 s.
2022-06-14 10:26:24,161 - base - recpack - INFO - Processed epoch 1 in 0.05 s.Batch Training Loss = 0.4982
2022-06-14 10:26:24,177 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.7635908410322833, which is worse than previous iterations.
2022-06-14 10:26:24,177 - base - recpack - INFO - Evaluation at end of 1 took 0.02 s.
2022-06-14 10:26:24,217 - base - recpack - INFO - Processed epoch 2 in 0.04 s.Batch Training Loss = 0.4791
2022-06-14 10:26:24,235 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.9545933701454672, 

## Running experiment using pipeline

In [25]:
from recpack.pipelines import PipelineBuilder
from recpack.data.datasets import AdressaOneWeek
from recpack.splitters.scenarios import WeakGeneralization

In [26]:
dataset = AdressaOneWeek(
    path='/Users/robinverachtert/workspace/doctorate/datasets/',
    filename='adressa_one_week.csv'
)
data = dataset.load_interaction_matrix()

  0%|          | 0/2532729 [00:00<?, ?it/s]

  0%|          | 0/2532729 [00:00<?, ?it/s]

In [27]:
# Subsample to 1000 users to make it faster
import numpy as np

users = np.random.choice(list(data.active_users), 1000)
data = data.users_in(users)

In [28]:
scenario = WeakGeneralization(frac_data_in=0.8, validation=True)
scenario.split(data)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [29]:
from recpack.pipelines import ALGORITHM_REGISTRY
ALGORITHM_REGISTRY.register('NeuMF', NeuMF)

In [37]:
builder = PipelineBuilder()
builder.set_data_from_scenario(scenario)
builder.set_optimisation_metric('NormalizedDiscountedCumulativeGainK', K=10)
builder.add_metric('NormalizedDiscountedCumulativeGainK', K=10)
builder.add_metric('CoverageK', K=10)

builder.add_algorithm(
    algorithm = 'NeuMF', 
    params = {
        'batch_size': 512,
        'max_epochs': 10,
        'learning_rate': 0.01,
        'stopping_criterion': 'ndcg',
        'predict_topK': 20,
        'U': 3,
        'dropout': 0.01
    },
    grid = {
        'num_components': [32, 64, 128],
    }
)

builder.add_algorithm('Popularity', params={'K': 20})
builder.add_algorithm('ItemKNN', params={'similarity': 'conditional_probability'})

In [38]:
pipeline = builder.build()

In [39]:
pipeline.run()

  0%|          | 0/3 [00:00<?, ?it/s]

2022-06-14 11:35:50,574 - base - recpack - INFO - Processed epoch 0 in 0.95 s.Batch Training Loss = 386.5929
2022-06-14 11:35:55,193 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.09531884052449853, which is better than previous iterations.
2022-06-14 11:35:55,194 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 11:35:55,242 - base - recpack - INFO - Evaluation at end of 0 took 4.67 s.
2022-06-14 11:35:56,391 - base - recpack - INFO - Processed epoch 1 in 1.15 s.Batch Training Loss = 215.8187
2022-06-14 11:36:00,255 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.09616565142814695, which is better than previous iterations.
2022-06-14 11:36:00,256 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 11:36:00,318 - base - recpack - INFO - Evaluation at end of 1 took 3.93 s.
2022-06-14 11:36:01,397 - base - recpack - INFO - Processed epoch 2 in 1.08 s.Batch Training Loss = 156.5306
2022-06-14 11:3

2022-06-14 11:38:13,676 - base - recpack - INFO - Processed epoch 0 in 5.01 s.Batch Training Loss = 281.7143
2022-06-14 11:38:29,564 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.06854066043060869, which is better than previous iterations.
2022-06-14 11:38:29,565 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 11:38:29,788 - base - recpack - INFO - Evaluation at end of 0 took 16.11 s.
2022-06-14 11:38:34,544 - base - recpack - INFO - Processed epoch 1 in 4.75 s.Batch Training Loss = 150.6045
2022-06-14 11:38:54,420 - stopping_criterion - recpack - INFO - StoppingCriterion has value 0.10127981458122304, which is better than previous iterations.
2022-06-14 11:38:54,421 - base - recpack - INFO - Model improved. Storing better model.
2022-06-14 11:38:54,594 - base - recpack - INFO - Evaluation at end of 1 took 20.05 s.
2022-06-14 11:39:00,131 - base - recpack - INFO - Processed epoch 2 in 5.54 s.Batch Training Loss = 144.9262
2022-06-14 11

  self._set_arrayXarray(i, j, x)


2022-06-14 13:53:45,297 - base - recpack - INFO - Fitting ItemKNN complete - Took 0.276s




In [33]:
import pandas as pd

In [40]:
pd.DataFrame.from_dict(pipeline.get_metrics()).T

Unnamed: 0,normalizeddiscountedcumulativegaink_10,coveragek_10
"NeuMF(U=3,batch_size=512,dropout=0.01,exact_sampling=False,keep_last=False,learning_rate=0.01,max_epochs=10,max_iter_no_change=5,min_improvement=0.0,num_components=32,predict_topK=20,save_best_to_file=False,seed=1360846362,stop_early=False,stopping_criterion=<recpack.algorithms.stopping_criterion.StoppingCriterion object at 0x129de83a0>)",0.102188,0.003584
Popularity(K=20),0.107707,0.003584
"ItemKNN(K=200,normalize=False,normalize_X=False,normalize_sim=False,pop_discount=None,similarity=conditional_probability)",0.136076,0.079928
