In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, lil_matrix

from recpack.algorithms.base import Algorithm, TorchMLAlgorithm, FactorizationAlgorithm
from recpack.algorithms.util import get_users, get_batches, naive_sparse2tensor, naive_tensor2sparse
from recpack.matrix import to_binary, InteractionMatrix

# Softmax over popularity scores

In [None]:
import numpy as np
from scipy.sparse import csr_matrix

from recpack.algorithms.base import Algorithm

class RandomizedSoftmaxPopularity(Algorithm):
    """Recommend items using softmax on the natural logarithm of item counts.

    Recommendations are sampled from the probability distribution
    created by taking the softmax of the natural logarithm of item counts.
    Items are scored such that the distance between the item in first place
    and the item in second place is the same as between all other items.

    :param K: Only the K most frequent items are considered for recommendation
    :param tau: Temperature in the softmax computation
    """
    def __init__(self, K, tau):
        self.K = K
        self.tau = tau

    def _fit(self, X: csr_matrix):
        # compute pop by taking logarithm of the raw counts
        #.A1 puts it into a 1d array, making all subsequent operations easy
        pop = np.log(np.sum(X, axis=0)).A1

        max_pop = np.max(pop)

        # Cut to top K
        self.top_k_pop_items_ = np.argsort(pop)[-self.K:]
        top_k_pop = pop[self.top_k_pop_items_]

        # To make softmax numerically stable, we compute exp((pop - max(pop))/self.tau)
        # instead of exp(pop/self.tau):
        #
        # softmax for item i can then be computed as
        # e^((pop[i] - max(pop))/tau) / sum([e^(pop[j] - max(pop))/self.tau for j in topK])
        top_k_pop_minus_max = (top_k_pop - max_pop)/self.tau

        top_k_exp = np.exp(top_k_pop_minus_max)

        top_k_pop_sum = np.sum(top_k_exp)

        self.softmax_scores_ = top_k_exp / top_k_pop_sum    
        

    def _predict(self, X:csr_matrix):
        # Randomly sample items, with weights decided by the softmax scores
        users = X.nonzero()[0]

        # The resulting score = (K - ix)/K
        # The first sampled item gets score 1, and the last sampled item score 1/K
        score_list = [
            (u, i, (self.K-ix)/self.K)
            for u in set(users)
            for ix, i in enumerate(
                np.random.choice(
                    self.top_k_pop_items_,
                    size=self.K,
                    replace=False,
                    p=self.softmax_scores_
                )
            )
        ]
        user_idxs, item_idxs, scores = list(zip(*score_list))
        score_matrix = csr_matrix((scores, (user_idxs, item_idxs)), shape=X.shape)

        return score_matrix

## Try it out

In [None]:
users = [np.random.randint(0, 50) for i in range(1000)]
items = [np.random.randint(0, 50) for i in range(1000)]
values = [1 for i in range(1000)]
pageviews = csr_matrix((values, (users, items)), shape=(50, 50))
pageviews = to_binary(pageviews)


In [None]:
algo = RandomizedSoftmaxPopularity(K=20, tau=0.10)

In [None]:
algo.fit(pageviews)

In [None]:
algo.predict(pageviews).toarray()[0]

## Recency

In [None]:
import numpy as np
from scipy.sparse import csr_matrix, lil_matrix

from recpack.algorithms.base import Algorithm
from recpack.matrix import InteractionMatrix

class Recency(Algorithm):
    def _transform_fit_input(self, X):
        # X needs to be an InteractionMatrix for us to have access to
        # the time of interaction at fitting time
        self._assert_is_interaction_matrix(X)
        self._assert_has_timestamps(X)
        # No transformation needed
        return X
    
    def _fit(self, X:InteractionMatrix):
        # data.timestamps gives a pandas MultiIndex object, indexed by user and item,
        # we drop the index, and group by just the item index
        # then we select the maximal timestamp from this groupby
        max_ts_per_item = X.timestamps.reset_index().groupby('iid')['ts'].max()

        # apply min_max normalisation
        recency = np.zeros(X.shape[1])
        recency[max_ts_per_item.index] = max_ts_per_item.values

        most_recent = np.max(recency)
        least_recent = np.min(recency)

        recency = (recency - least_recent) / (most_recent - least_recent)
        self.recency_ = recency.copy()
        
    def _predict(self, X: csr_matrix):
        results = lil_matrix(X.shape)

        users = get_users(X)

        results[users] = self.recency_

        return results.tocsr()

        

In [None]:
num_users = 50
num_items = 100
num_interactions = 5000

min_t = 0
max_t = 500

USER_IX = InteractionMatrix.USER_IX
ITEM_IX = InteractionMatrix.ITEM_IX
TIMESTAMP_IX = "ts"


def data_m():
    np.random.seed(42)

    input_dict = {
        InteractionMatrix.USER_IX: [np.random.randint(0, num_users) for _ in range(0, num_interactions)],
        InteractionMatrix.ITEM_IX: [
            np.random.randint(0, num_items) for _ in range(0, num_interactions)
        ],
        InteractionMatrix.TIMESTAMP_IX: [
            np.random.randint(min_t, max_t) for _ in range(0, num_interactions)
        ],
    }

    df = pd.DataFrame.from_dict(input_dict)
    df.drop_duplicates([InteractionMatrix.USER_IX, InteractionMatrix.ITEM_IX], inplace=True)
    data = InteractionMatrix(
        df, InteractionMatrix.ITEM_IX, InteractionMatrix.USER_IX, timestamp_ix=InteractionMatrix.TIMESTAMP_IX
    )
    return data

data = data_m()

In [None]:
algo = Recency()
algo.fit(data)

In [None]:
algo.predict(data).toarray()

## Torch algo

In [None]:
from typing import List

import numpy as np
from scipy.sparse import csr_matrix, lil_matrix
import torch
import torch.nn as nn
import torch.optim as optim


from recpack.algorithms.base import TorchMLAlgorithm
from recpack.algorithms.stopping_criterion import StoppingCriterion

class MFModule(nn.Module):
    """MF torch module, encodes the embeddings and the forward functionality.

    :param num_users: the amount of users
    :type num_users: int
    :param num_items: the amount of items
    :type num_items: int
    :param num_components: The size of the embedding per user and item, defaults to 100
    :type num_components: int, optional
    """

    def __init__(self, num_users, num_items, num_components=100):
        super().__init__()

        self.num_components = num_components
        self.num_users = num_users
        self.num_items = num_items

        self.user_embedding = nn.Embedding(num_users, num_components)  # User embedding
        self.item_embedding = nn.Embedding(num_items, num_components)  # Item embedding

        self.std = 1 / num_components ** 0.5
        # Initialise embeddings to a random start
        nn.init.normal_(self.user_embedding.weight, std=self.std)
        nn.init.normal_(self.item_embedding.weight, std=self.std)

    def forward(
        self, user_tensor: torch.Tensor, item_tensor: torch.Tensor
    ) -> torch.Tensor:
        """
        Compute dot-product of user embedding (w_u) and item embedding (h_i)
        for every user and item pair in user_tensor and item_tensor.

        :param user_tensor: [description]
        :type user_tensor: [type]
        :param item_tensor: [description]
        :type item_tensor: [type]
        """
        w_u = self.user_embedding(user_tensor)
        h_i = self.item_embedding(item_tensor)

        return w_u.matmul(h_i.T)

def my_loss(true_sim, predicted_sim):
    """Computes the total absolute error from predicted compared to true,
    and averages over all users
    """
    return torch.mean(torch.sum(torch.abs(true_sim - predicted_sim), axis=1))


class SillyMF(TorchMLAlgorithm):
    def __init__(self, batch_size, max_epochs, learning_rate, num_components=100):
        super().__init__(
            batch_size=batch_size,
            max_epochs=max_epochs,
            learning_rate=learning_rate,
            stopping_criterion='recall',
            seed=42
        )
        self.num_components = num_components

    def _init_model(self, X:csr_matrix):
        num_users, num_items = X.shape
        self.model_ = MFModule(
            num_users, num_items, num_components=self.num_components
        ).to(self.device)

        # We'll use a basic SGD optimiser
        self.optimizer = optim.SGD(self.model_.parameters(), lr=self.learning_rate)
        self.steps = 0

    def _train_epoch(self, X):
        losses = []
        item_tensor = torch.arange(X.shape[1]).to(self.device)
        for users in get_batches(get_users(X), batch_size=self.batch_size):
            self.optimizer.zero_grad()
            user_tensor = torch.LongTensor(users).to(self.device)
            scores = self.model_.forward(user_tensor, item_tensor)
            expected_scores = naive_sparse2tensor(X[users])
            loss = my_loss(expected_scores, scores)
            
            # Backwards propagation of the loss
            loss.backward()
            losses.append(loss.item())
            # Update the weight according to the gradients.
            # All automated thanks to torch.
            self.optimizer.step()
            self.steps += 1
        return losses
            
    def _batch_predict(self, X: csr_matrix, users: List[int] = None) -> np.ndarray:
        """Predict scores for matrix X, given the selected users.

        If there are no selected users, you can assume X is a full matrix,
        and users can be retrieved as the nonzero indices in the X matrix.

        :param X: Matrix of user item interactions
        :type X: csr_matrix
        :param users: users selected for recommendation
        :type users: List[int]
        :return: dense matrix of scores per user item pair.
        :rtype: np.ndarray
        """
        X_pred = lil_matrix(X.shape)

        if users is None:
            users = get_users(X)

        # Turn the np arrays and lists to torch tensors
        user_tensor = torch.LongTensor(users).to(self.device)
        item_tensor = torch.arange(X.shape[1]).to(self.device)

        X_pred[users] = self.model_(user_tensor, item_tensor).detach().cpu().numpy()
        return X_pred.tocsr()
    

In [None]:
from recpack.scenarios import Timed

algo = SillyMF(10, 3, 0.5, num_components=20)

sc = Timed(400, t_validation=300, validation=True)
sc.split(data)
algo.fit(sc.training_data, sc.validation_data)


In [None]:
algo.predict(sc.test_data[0]).toarray()

## SVD

In [None]:
import numpy as np
from scipy.sparse import csr_matrix, lil_matrix, diags
from sklearn.decomposition import TruncatedSVD

from recpack.algorithms.base import FactorizationAlgorithm

class SVD(FactorizationAlgorithm):
    """Singular Value Decomposition as dimension reduction recommendation algorithm.

    SVD computed using the TruncatedSVD implementation from sklearn.
    U x Sigma x V = X
    U are the user features, and the item features are computed as Sigma x V.

    :param num_components: The size of the latent dimension
    :type num_components: int

    :param random_state: The seed for the random state to allow for comparison
    :type random_state: int
    """

    def __init__(self, num_components=100, random_state=42):
        super().__init__(num_components=num_components)

        self.random_state = random_state

    def _fit(self, X: csr_matrix):
        model = TruncatedSVD(
            n_components=self.num_components, n_iter=7, random_state=self.random_state
        )
        # Factorization computes U x Sigma x V
        # U are the user features,
        # Sigma x V are the item features.
        self.user_embedding_ = model.fit_transform(X)

        V = model.components_
        sigma = diags(model.singular_values_)
        self.item_embedding_ = sigma @ V

        return self

In [None]:
algo = SVD(num_components = 20)

In [None]:
algo.fit(data)

In [None]:
algo.predict(data)

# Use with pipelines

In [None]:
from recpack.pipelines import ALGORITHM_REGISTRY

ALGORITHM_REGISTRY.register(SillyMF.__name__, SillyMF)


In [None]:
from recpack.pipelines import PipelineBuilder

pipeline_builder = PipelineBuilder()

pipeline_builder.set_data_from_scenario(sc.training_data)


In [None]:
# Add the baseline algorithms
# Grid parameters will be optimised using grid search before final evaluation
pipeline_builder.add_algorithm('ItemKNN', grid={'K': [10, 20]})
pipeline_builder.add_algorithm('EASE', grid={'l2': [10, 100, 1000], 'alpha': [0, 0.1, 0.5]})


In [None]:
# Add our new algorithm
# Optimising learning rate and num_components
# setting fixed values for max_epochs and batch_size
pipeline_builder.add_algorithm(
    'SillyMF',
    grid={
        'learning_rate': [0.1, 0.01, 0.3], 
        'num_components': [100, 200, 400]
    },
    params={
        'max_epochs': 5,
        'batch_size': 1024
    }
)


In [None]:
# Add NDCG and Recall to be evaluated at 10, 20
pipeline_builder.add_metric('NormalizedDiscountedCumulativeGainK', [10, 20])
pipeline_builder.add_metric('RecallK', [10, 20])


In [None]:
pipeline_builder.set_optimisation_metric('RecallK', 10)

In [None]:
pipeline = pipeline_builder.build()


In [None]:
pipeline.run()

In [None]:
pipeline.get_metrics()