In [1]:
import os
os.chdir("..")
# for me, it only works if I'm in the spotlight directory

This pipeline is designed to train the model in an automated way.
First, we will define a config with all the specifications. This dictionary is vital to keep track of the models we have used.
Then run the model and extract the metrics that we need, to evaluate the training.

In [27]:
config = {
    # About the model
    "model": "sequence",    # sequence or factorizer
    "submodel": "implicit",    # implicit or explicit
    "loss": "pointwise",    # one of ‘pointwise’, ‘bpr’, ‘hinge’, ‘adaptive_hinge’
    "representation": "pooling",    # one of ‘pooling’, ‘cnn’, ‘lstm’, ‘mixture’
    "embedding_dim": 32,
    "n_iter": 10,
    "batch_size": 256,
    "l2": 0.0,
    "lr": 0.01,
    "optim": None,
    "use_cuda": False, 
    "sparse": False, 
    "random_state": None, 
    "num_negative_samples": 5,

    # About the database
    "dataset": "Movielens",    # Movielens, Synthetic or Goodbooks
    "size": "100K",
    # synthetic has a lot of different parameters but I'm not sure we are going to use it
}

In [28]:
def define_model(config):
    """Function to define the model based on the parameters of the config"""
    
    if config["model"] == "sequence" and config["submodel"] == "implicit":
        from spotlight.sequence.implicit import ImplicitSequenceModel as ImportModel
    elif config["model"] == "factorizer" and config["submodel"] == "implicit":
        from spotlight.factorization.implicit import ImplicitFactorizationModel as ImportModel
    elif config["model"] == "factorizer" and config["submodel"] == "explicit":
        from spotlight.factorization.implicit import ImplicitFactorizationModel as ImportModel
    else:
        raise Exception(f"Incorrect model configuration for model {config["model"]} and submodel {config["submodel"]}.")
    return ImportModel(loss=config["loss"], representation=config["representation"], embedding_dim=config["embedding_dim"],
                       n_iter=config["n_iter"], batch_size=config["batch_size"], l2=config["l2"], learning_rate=config["lr"],
                       optimizer_func=config["optim"], use_cuda=config["use_cuda"], sparse=config["sparse"], 
                       random_state=config["random_state"], num_negative_samples=config["num_negative_samples"])

In [30]:
def define_dataset(config):
    """Function to define the dataset based on the parameters of the config"""
    from spotlight.datasets.movielens import get_movielens_dataset
    from spotlight.datasets.synthetic import generate_sequential
    from spotlight.datasets.goodbooks import get_goodbooks_dataset
    
    if config["dataset"] == "Movielens":
        return get_movielens_dataset(variant=config["size"])
    elif config["dataset"] == "Synthetic":
        return generate_sequential()
    elif config["dataset"] == "Goodbooks":
        return get_goodbooks_dataset()
    else:
        raise Exception(f"Incorrect dataset configuration for dataset {config["dataset"]}.")

In [46]:
def evaluate_model(model, test):
    from spotlight.evaluation import mrr_score, precision_recall_score, rmse_score, sequence_mrr_score, sequence_precision_recall_score
    eval = {"mrr": mrr_score(model, test), "precision_recall": precision_recall_score(model, test), #"rmse": rmse_score(model, test),
            #"sequence_mrr":sequence_mrr_score(model, test), "sequence_precision_recall": sequence_precision_recall_score(model, test)
           }
    return eval

In [48]:
from spotlight.cross_validation import random_train_test_split

dataset = define_dataset(config)
train, test = random_train_test_split(dataset)
train, test = train.to_sequence(), test
model = define_model(config)
model.fit(train)

eval = evaluate_model(model, test)