In [12]:
import numpy as np
import matplotlib.pyplot as plt

In [10]:
from spotlight.datasets.movielens import get_movielens_dataset

dataset = get_movielens_dataset(variant='100K')
print(dataset)

<Interactions dataset (944 users x 1683 items x 100000 interactions)>


We can feed our dataset to the ExplicitFactorizationModel class - and sklearn-like object that allows us to train and evaluate the explicit factorization models.

Internally, the model uses the BilinearNet class to represents users and items. It's composed of a 4 embedding layers:

* a (num_users x latent_dim) embedding layer to represent users,
* a (num_items x latent_dim) embedding layer to represent items,
* a (num_users x 1) embedding layer to represent user biases, and
* a (num_items x 1) embedding layer to represent item biases.

Together, these give us the predictions. Their accuracy is evaluated using one of the Spotlight losses. In this case, we'll use the regression loss, which is simply the squared difference between the true and the predicted rating.

In [4]:
model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=128,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

In [7]:
from spotlight.cross_validation import random_train_test_split

train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

Split into 
 <Interactions dataset (944 users x 1683 items x 80000 interactions)> and 
 <Interactions dataset (944 users x 1683 items x 20000 interactions)>.


In [11]:
history=model.fit(train, verbose=True)

Epoch 0: loss 0.819451430930367
Epoch 1: loss 0.8083234082294416
Epoch 2: loss 0.7969059989422183
Epoch 3: loss 0.7858259760880772
Epoch 4: loss 0.7772685093215749
Epoch 5: loss 0.7654752338988872
Epoch 6: loss 0.7553258417527887
Epoch 7: loss 0.7452180136608172
Epoch 8: loss 0.732913401307939
Epoch 9: loss 0.7219583158251606


In [9]:
from spotlight.evaluation import rmse_score

train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)

print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))

Train RMSE 0.895, test RMSE 0.940
