In [1]:
import sys
import logging

import numpy as np
import scipy as sp
import sklearn
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("whitegrid")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [3]:
from recsys_data.spotlight_datasets import get_movielens_dataset, random_train_test_split
from recsys_data.torch_utils import (cpu, gpu, minibatch, set_seed, shuffle, sample_items, 
                                     _predict_process_ids)
from recsys_data.spotlight_evaluation import mrr_score, precision_recall_score

In [4]:
CUDA = torch.cuda.is_available()

In [5]:
data = get_movielens_dataset(variant='100K')

In [6]:
class ScaledEmbedding(nn.Embedding):
    """
    Embedding layer that initialises its values
    to using a normal variable scaled by the inverse
    of the embedding dimension.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.normal_(0, 1.0 / self.embedding_dim)
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0)


class ZeroEmbedding(nn.Embedding):
    """
    Embedding layer that initialises its values
    to using a normal variable scaled by the inverse
    of the embedding dimension.

    Used for biases.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.zero_()
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0)

In [7]:
class BilinearNet(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=32,
                 user_embedding_layer=None, item_embedding_layer=None, sparse=False):

        super().__init__()

        self.embedding_dim = embedding_dim

        if user_embedding_layer is not None:
            self.user_embeddings = user_embedding_layer
        else:
            self.user_embeddings = ScaledEmbedding(num_users, embedding_dim,
                                                   sparse=sparse)

        if item_embedding_layer is not None:
            self.item_embeddings = item_embedding_layer
        else:
            self.item_embeddings = ScaledEmbedding(num_items, embedding_dim,
                                                   sparse=sparse)

        self.user_biases = ZeroEmbedding(num_users, 1, sparse=sparse)
        self.item_biases = ZeroEmbedding(num_items, 1, sparse=sparse)

    def forward(self, user_ids, item_ids):
        """
        Compute the forward pass of the representation.

        Parameters
        ----------

        user_ids: tensor
            Tensor of user indices.
        item_ids: tensor
            Tensor of item indices.

        Returns
        -------

        predictions: tensor
            Tensor of predictions.
        """

        user_embedding = self.user_embeddings(user_ids)
        item_embedding = self.item_embeddings(item_ids)

        user_embedding = user_embedding.squeeze()
        item_embedding = item_embedding.squeeze()

        user_bias = self.user_biases(user_ids).squeeze()
        item_bias = self.item_biases(item_ids).squeeze()

        dot = (user_embedding * item_embedding).sum(1)

        return dot + user_bias + item_bias

In [8]:
class DeepNet(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=8,
                 user_embedding_layer=None, item_embedding_layer=None, sparse=False):

        super().__init__()
        self.embedding_dim = embedding_dim

        self.user_embeddings = ScaledEmbedding(num_users, embedding_dim, sparse=sparse)
        self.item_embeddings = ScaledEmbedding(num_items, embedding_dim, sparse=sparse)

        self._h1 = nn.Linear(2*embedding_dim, embedding_dim * 4)
        self._h2 = nn.Linear(embedding_dim * 4 , embedding_dim * 2)
        self._h3 = nn.Linear(embedding_dim * 2, 1)

    def forward(self, user_ids, item_ids):
        """
        Compute the forward pass of the representation.

        Parameters
        ----------

        user_ids: tensor
            Tensor of user indices.
        item_ids: tensor
            Tensor of item indices.

        Returns
        -------

        predictions: tensor
            Tensor of predictions.
        """

        user_embedding = self.user_embeddings(user_ids)
        item_embedding = self.item_embeddings(item_ids)
        
        embedding = torch.cat([user_embedding, item_embedding], dim=1)
        hidden = torch.sigmoid(self._h1(embedding))
        #if not self.training:
        #    print(hidden)
        hidden = torch.sigmoid(self._h2(hidden))
        out = self._h3(hidden)

        return out

In [9]:
class DeepBilinearNet(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=32,
                 user_embedding_layer=None, item_embedding_layer=None, sparse=False):

        super().__init__()

        self.embedding_dim = embedding_dim

        if user_embedding_layer is not None:
            self.user_embeddings = user_embedding_layer
        else:
            self.user_embeddings = ScaledEmbedding(num_users, embedding_dim,
                                                   sparse=sparse)

        if item_embedding_layer is not None:
            self.item_embeddings = item_embedding_layer
        else:
            self.item_embeddings = ScaledEmbedding(num_items, embedding_dim,
                                                   sparse=sparse)

        self._h1 = nn.Linear(embedding_dim, embedding_dim // 2)
        self._h2 = nn.Linear(embedding_dim // 2 , embedding_dim // 4)
        self._h3 = nn.Linear(embedding_dim // 4, 1)

    def forward(self, user_ids, item_ids):
        """
        Compute the forward pass of the representation.

        Parameters
        ----------

        user_ids: tensor
            Tensor of user indices.
        item_ids: tensor
            Tensor of item indices.

        Returns
        -------

        predictions: tensor
            Tensor of predictions.
        """

        user_embedding = self.user_embeddings(user_ids)
        item_embedding = self.item_embeddings(item_ids)

        user_embedding = user_embedding.squeeze()
        item_embedding = item_embedding.squeeze()

        # Not needed since linar layer has biases
        #user_bias = self.user_biases(user_ids).squeeze()
        #item_bias = self.item_biases(item_ids).squeeze()

        prod = user_embedding * item_embedding
        hidden = torch.sigmoid(self._h1(prod))
        hidden = torch.sigmoid(self._h2(hidden))
        out = self._h3(hidden)
        return out

In [10]:
class FMModel(object):
    def __init__(self,
                 *,
                 num_users,
                 num_items,
                 net,
                 embedding_dim=32,
                 n_iter=10,
                 batch_size=256,
                 l2=0.0,
                 learning_rate=1e-2,
                 optimizer_func=None,
                 use_cuda=False,
                 sparse=False,
                 random_state=None,
                 num_negative_samples=5):

        self._embedding_dim = embedding_dim
        self._n_iter = n_iter
        self._learning_rate = learning_rate
        self._batch_size = batch_size
        self._l2 = l2
        self._use_cuda = use_cuda
        self._sparse = sparse
        self._random_state = random_state or np.random.RandomState()
        self._num_negative_samples = num_negative_samples
        self._net = net
        self._num_items = num_items
        self._num_users = num_users
        self._optimizer = optim.Adam(
            self._net.parameters(),
            weight_decay=self._l2,
            lr=self._learning_rate
        )

        set_seed(self._random_state.randint(-10**8, 10**8),
                 cuda=self._use_cuda)

    def _loss(self, positive_predictions, negative_predictions, mask=None):
        loss = (1.0 - torch.sigmoid(positive_predictions -
                                    negative_predictions))

        if mask is not None:
            mask = mask.float()
            loss = loss * mask
            return loss.sum() / mask.sum()

        return loss.mean()

    def fit(self, interactions, verbose=False):
        """
        Fit the model.

        When called repeatedly, model fitting will resume from
        the point at which training stopped in the previous fit
        call.

        Parameters
        ----------

        interactions: :class:`spotlight.interactions.Interactions`
            The input dataset.

        verbose: bool
            Output additional information about current epoch and loss.
        """

        user_ids = interactions.user_ids.astype(np.int64)
        item_ids = interactions.item_ids.astype(np.int64)

        for epoch_num in range(self._n_iter):

            users, items = shuffle(user_ids,
                                   item_ids,
                                   random_state=self._random_state)

            user_ids_tensor = gpu(torch.from_numpy(users),
                                  self._use_cuda)
            item_ids_tensor = gpu(torch.from_numpy(items),
                                  self._use_cuda)

            epoch_loss = 0.0
            a = gpu(torch.from_numpy(np.array([user_ids[10]])), self._use_cuda)
            #print(self._net.user_embeddings(a))

            for (minibatch_num,
                 (batch_user,
                  batch_item)) in enumerate(minibatch(user_ids_tensor,
                                                      item_ids_tensor,
                                                      batch_size=self._batch_size)):

                user_var = Variable(batch_user)
                item_var = Variable(batch_item)
                positive_prediction = self._net(user_var, item_var)
                negative_prediction = self._get_negative_prediction(user_var)
                #negative_prediction = self._get_negative_prediction_items(item_var)

                self._optimizer.zero_grad()

                loss = self._loss(positive_prediction, negative_prediction)
                # for scaled
                #loss += 0.01 * torch.norm(self._net.scale - 1., 2)
                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1

            if verbose:
                print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

            if np.isnan(epoch_loss) or epoch_loss == 0.0:
                raise ValueError('Degenerate epoch loss: {}'
                                 .format(epoch_loss))

    def _get_negative_prediction(self, user_ids):

        negative_items = sample_items(
            self._num_items,
            len(user_ids),
            random_state= self._random_state) # np.random.RandomState(42))#
        negative_var = Variable(
            gpu(torch.from_numpy(negative_items), self._use_cuda)
        )
        negative_prediction = self._net(user_ids, negative_var)

        return negative_prediction

    def _get_negative_prediction_items(self, item_ids):

        negative_users = sample_items(
            self._num_users,
            len(item_ids),
            random_state= self._random_state) # np.random.RandomState(42))#
        negative_var = Variable(
            gpu(torch.from_numpy(negative_users), self._use_cuda)
        )
        negative_prediction = self._net(negative_var, item_ids)

        return negative_prediction
    
    def _get_multiple_negative_predictions(self, user_ids, n=5):

        batch_size = user_ids.size(0)

        negative_prediction = self._get_negative_prediction(user_ids
                                                            .resize(batch_size, 1)
                                                            .expand(batch_size, n)
                                                            .resize(batch_size * n))

        return negative_prediction.view(n, len(user_ids))

    def predict(self, user_ids, item_ids=None):
        """
        Make predictions: given a user id, compute the recommendation
        scores for items.

        Parameters
        ----------

        user_ids: int or array
           If int, will predict the recommendation scores for this
           user for all items in item_ids. If an array, will predict
           scores for all (user, item) pairs defined by user_ids and
           item_ids.
        item_ids: array, optional
            Array containing the item ids for which prediction scores
            are desired. If not supplied, predictions for all items
            will be computed.

        Returns
        -------

        predictions: np.array
            Predicted scores for all items in item_ids.
        """
        self.training = False
        self._net.train(False)

        user_ids, item_ids = _predict_process_ids(user_ids, item_ids,
                                                  self._num_items,
                                                  self._use_cuda)

        out = self._net(user_ids, item_ids)

        return cpu(out.data).numpy().flatten()


In [11]:
train, test = random_train_test_split(data)

num_users, num_items = data.num_users, data.num_items

In [19]:
net = gpu(BilinearNet(num_users, num_items, embedding_dim=32, sparse=False),
          gpu=CUDA)

In [20]:
model = FMModel(net=net, num_users=num_users, num_items=num_items, n_iter=15)

In [21]:
loss = model.fit(train, verbose=True)

Epoch 0: loss 0.30181315155646293
Epoch 1: loss 0.1711395392640711
Epoch 2: loss 0.1547262336785039
Epoch 3: loss 0.14716710788182938
Epoch 4: loss 0.139536773863311
Epoch 5: loss 0.13576723887516667
Epoch 6: loss 0.12900713340828593
Epoch 7: loss 0.12612181025952957
Epoch 8: loss 0.12483221125869325
Epoch 9: loss 0.12131513352877797
Epoch 10: loss 0.1201091303992957
Epoch 11: loss 0.118971790535191
Epoch 12: loss 0.11656660529466482
Epoch 13: loss 0.11565854953834043
Epoch 14: loss 0.1142451648180858


In [22]:
prec, recall = precision_recall_score(model, train)
prec.mean()

0.5814422057264051

In [16]:
prec, recall = precision_recall_score(model, train)
prec.mean()

0.5580063626723224

In [23]:
prec, recall = precision_recall_score(model, test)
prec.mean()

0.12080679405520171

In [17]:
prec, recall = precision_recall_score(model, test)
prec.mean()

0.09968152866242039

In [24]:
mrr_score(model, train).mean(), mrr_score(model, test).mean()

(0.0558032748722695, 0.04648800464464815)

In [18]:
mrr_score(model, train).mean(), mrr_score(model, test).mean()

(0.052990519204356205, 0.03728451053949891)