In [1]:
import numpy as np

import torch

from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.interactions import Interactions
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import rmse_score

dataset = get_movielens_dataset(variant='100K')

  from ._conv import register_converters as _register_converters


In [2]:
model = ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=20,  # latent dimensionality
                                   n_iter=10,  # number of epochs of training
                                   batch_size=1024,  # minibatch size
                                   l2=1e-9,  # strength of L2 regularization
                                   learning_rate=1e-3,
                                   use_cuda=torch.cuda.is_available())

In [3]:
train, test = random_train_test_split(dataset, random_state=np.random.RandomState(42))

print('Split into \n {} and \n {}.'.format(train, test))

Split into 
 <Interactions dataset (944 users x 1683 items x 80000 interactions)> and 
 <Interactions dataset (944 users x 1683 items x 20000 interactions)>.


In [4]:
type(train.user_ids[0])

numpy.int32

In [5]:
type(train.item_ids[0])

numpy.int32

In [6]:
type(train.ratings[0])

numpy.float32

In [7]:
model.fit(train, verbose=True)

Epoch 0: loss 13.326524903502646
Epoch 1: loss 12.484332821037196
Epoch 2: loss 11.218098604226414
Epoch 3: loss 8.72187715844263
Epoch 4: loss 5.38694132430644
Epoch 5: loss 2.8690303307545335
Epoch 6: loss 1.72804187370252
Epoch 7: loss 1.3053828387320796
Epoch 8: loss 1.1225555546676056
Epoch 9: loss 1.0243728915347328


In [8]:
train_rmse = rmse_score(model, train)
test_rmse = rmse_score(model, test)

print('Train RMSE {:.3f}, test RMSE {:.3f}'.format(train_rmse, test_rmse))

Train RMSE 0.990, test RMSE 1.026


In [9]:
def get_datasets():
    """
    Returns
    -------

    Interactions: :class:`spotlight.interactions.Interactions`
        instance of the interactions class
    """

    def get_part(num, part):
        print("Reading {} part {}".format(part[1:], num))
        URL_PREFIX = 'ml-10M100K/r' + str(num)

        data = np.genfromtxt(URL_PREFIX + part, delimiter='::', dtype=(int, int, float))
        users, items, ratings = np.zeros(len(data), dtype=int), \
                                np.zeros(len(data), dtype=int), \
                                np.zeros(len(data), dtype=np.float32)

        for i, inst in enumerate(data):
            users[i], items[i], ratings[i] = inst[0], inst[1], inst[2]
        return users, items, ratings
    
    train_extension = '.train'
    test_extension = '.test'
    
    trains = [Interactions(*get_part(r, train_extension)) for r in range(1, 6)]
    tests = [Interactions(*get_part(r, test_extension)) for r in range(1, 6)]

    return trains, tests

In [10]:
train_datasets, test_datasets = get_datasets()

Reading train part 1
Reading train part 2
Reading train part 3
Reading train part 4
Reading train part 5
Reading test part 1
Reading test part 2
Reading test part 3
Reading test part 4
Reading test part 5


In [11]:
lams = [0.1, 0.01, 0.001]
for lam in lams:
    print("L2 Regularization value - {}\n".format(lam))
    model = ExplicitFactorizationModel(loss='regression',
                                       embedding_dim=20,  # latent dimensionality
                                       n_iter=10,  # number of epochs of training
                                       batch_size=1024,  # minibatch size
                                       l2=lam,  # strength of L2 regularization
                                       learning_rate=1e-3,
                                       use_cuda=torch.cuda.is_available())
    
    errors = []
    for train, test in zip(train_datasets, test_datasets):
        model.fit(train, verbose=True)
        errors.append(rmse_score(model, test))
    print("Cross-validation RMSE is {}".format(np.mean(errors)))

L2 Regularization value - 0.1



KeyboardInterrupt: 