In [10]:
from settings_definition import define_model, define_dataset
from spotlight.cross_validation import user_based_train_test_split

def evaluate_model(model, test, config):
    """This function evaluates the metric agreed by the team.
    At the moment we have not decided which ones so I just pick 2 as an example."""
    from spotlight.evaluation import mrr_score, precision_recall_score, rmse_score, sequence_mrr_score, sequence_precision_recall_score
    if config['model']=='factorizer':
        eval = {"mrr": mrr_score(model, test), "precision_recall": precision_recall_score(model, test), "rmse": rmse_score(model, test),
                #"sequence_mrr":sequence_mrr_score(model, test), "sequence_precision_recall": sequence_precision_recall_score(model, test)
               }
    else:
        eval = {"mrr": sequence_mrr_score(model, test), "precision_recall": sequence_precision_recall_score(model, test, k=5)
               }
    
    eval.update({"mean_precision": eval['precision_recall'][0].mean(), "precision_std": eval['precision_recall'][0].std(), 
                 "mean_recall":eval['precision_recall'][1].mean(), "recall_std": eval['precision_recall'][1].std(), 
                 "mrr_mean": eval['mrr'].mean(), "mrr_std": eval['mrr'].std()})
    return eval

In [11]:
## Mirar que estabas haciendo: completar el update de eval con las std de recall y precision

# 1. Model evaluation.

## 1.1 Sequential

In [12]:
config = {
    # About the model
    "model": "sequence",    # sequence or factorizer
    "submodel": "implicit",    # implicit or explicit
    "loss": "bpr",    # one of ‘pointwise’, ‘bpr’, ‘hinge’, ‘adaptive_hinge’ or 'regression', 'poisson','logistic'
    "representation": "pooling",    # for sequence one of ‘pooling’, ‘cnn’, ‘lstm’, ‘mixture’, for factorizer always NONE !!!
    "embedding_dim": 32, 
    "n_iter": 10,
    "batch_size": 256,
    "l2": 0.0,
    "lr": 0.01,
    "optim": None,
    "use_cuda": False, 
    "sparse": False, 
    "random_state": None, 
    "num_negative_samples": 5,

    # About the database
    "dataset": "Movielens",    # Movielens, Synthetic or Goodbooks
    "size": "100K",
    # synthetic has a lot of different parameters but I'm not sure we are going to use it
}


data = define_dataset(config)
train, test = user_based_train_test_split(data)

model1 = define_model(config)

In [13]:
train = train.to_sequence()
test = test.to_sequence()

model1.fit(train)

eval = evaluate_model(model1, test, config)
eval

{'mrr': array([0.01265823, 0.01315789, 0.05555556, ..., 0.00471698, 0.07142857,
        0.03448276]),
 'precision_recall': (array([0. , 0. , 0.2, ..., 0. , 0. , 0.6]),
  array([0. , 0. , 0.2, ..., 0. , 0. , 0.6])),
 'mean_precision': 0.056295620437956204,
 'precision_std': 0.11018630630712584,
 'mean_recall': 0.056295620437956204,
 'recall_std': 0.11018630630712584,
 'mrr_mean': 0.05081374909310326,
 'mrr_std': 0.14060858935239787}

## 1.2 Factorizers

### 1.2.1  Implicit

In [14]:
config = {
    # About the model
    "model": "factorizer",    # sequence or factorizer
    "submodel": "implicit",    # implicit or explicit
    "loss": "bpr",    # one of ‘pointwise’, ‘bpr’, ‘hinge’, ‘adaptive_hinge’ or 'regression', 'poisson','logistic'
    "representation": None,    # for sequence one of ‘pooling’, ‘cnn’, ‘lstm’, ‘mixture’, for factorizer always None !!!
    "embedding_dim": 32, 
    "n_iter": 10,
    "batch_size": 256,
    "l2": 0.0,
    "lr": 0.01,
    "optim": None,
    "use_cuda": False, 
    "sparse": False, 
    "random_state": None, 
    "num_negative_samples": 5,

    # About the database
    "dataset": "Movielens",    # Movielens, Synthetic or Goodbooks
    "size": "100K",
    # synthetic has a lot of different parameters but I'm not sure we are going to use it
}


data = define_dataset(config)
train, test = user_based_train_test_split(data)

model2 = define_model(config)

In [15]:
model2.fit(train)

eval = evaluate_model(model2, test, config)
eval

{'mrr': array([0.08998657, 0.01227764, 0.0143746 , 0.02783247, 0.02962231,
        0.04164298, 0.01635033, 0.02285948, 0.06500461, 0.02310111,
        0.06031569, 0.03922126, 0.03684311, 0.05036622, 0.0503462 ,
        0.02446285, 0.01342779, 0.03550919, 0.02306668, 0.01507695,
        0.0173691 , 0.01481321, 0.01062778, 0.0237602 , 0.04346916,
        0.02531158, 0.04221533, 0.02062894, 0.01683725, 0.03222003,
        0.04947961, 0.049648  , 0.01758903, 0.07075861, 0.03544705,
        0.02211897, 0.01125238, 0.05861225, 0.04776875, 0.00988249,
        0.00488352, 0.01734377, 0.03557974, 0.02029616, 0.00873769,
        0.01330407, 0.02686311, 0.05551123, 0.03400436, 0.01450235,
        0.02716674, 0.03639345, 0.10518442, 0.02204576, 0.09785482,
        0.07860689, 0.02264614, 0.02410437, 0.02556468, 0.01261749,
        0.03158122, 0.04584419, 0.01711283, 0.06571803, 0.06216361,
        0.00904229, 0.01826799, 0.01459157, 0.03719594, 0.00841758,
        0.10694252, 0.04900984, 0.0742203

### 1.2.1  Explicit

In [16]:
config = {
    # About the model
    "model": "factorizer",    # sequence or factorizer
    "submodel": "explicit",    # implicit or explicit
    "loss": "poisson",    # one of ‘pointwise’, ‘bpr’, ‘hinge’, ‘adaptive_hinge’ or 'regression', 'poisson','logistic'
    "representation": None,    # for sequence one of ‘pooling’, ‘cnn’, ‘lstm’, ‘mixture’, for factorizer always None !!!
    "embedding_dim": 32, 
    "n_iter": 10,
    "batch_size": 256,
    "l2": 0.0,
    "lr": 0.01,
    "optim": None,
    "use_cuda": False, 
    "sparse": False, 
    "random_state": None, 
    "num_negative_samples": 5,

    # About the database
    "dataset": "Movielens",    # Movielens, Synthetic or Goodbooks
    "size": "100K",
    # synthetic has a lot of different parameters but I'm not sure we are going to use it
}


data = define_dataset(config)
train, test = user_based_train_test_split(data)

model3 = define_model(config)

In [18]:
model3.fit(train)

eval = evaluate_model(model3, test, config)
eval

{'mrr': array([0.02354537, 0.04612623, 0.01818111, 0.04386942, 0.00162648,
        0.00267469, 0.0396516 , 0.00430109, 0.00597827, 0.01855548,
        0.02009738, 0.02775858, 0.00270335, 0.02450203, 0.0140966 ,
        0.00357755, 0.01353591, 0.02430895, 0.02699911, 0.0074485 ,
        0.02737874, 0.02929348, 0.0314315 , 0.01337262, 0.04184779,
        0.00770728, 0.01735366, 0.00713005, 0.01737863, 0.00891787,
        0.02127797, 0.02884152, 0.00520589, 0.00874125, 0.01718297,
        0.01911683, 0.01038515, 0.01055896, 0.01534151, 0.02129745,
        0.01591505, 0.02586353, 0.0199161 , 0.01211212, 0.01120386,
        0.03296798, 0.01032851, 0.05052149, 0.02666047, 0.01704476,
        0.016358  , 0.01121399, 0.00756319, 0.00869298, 0.01557803,
        0.00981431, 0.00949308, 0.01440763, 0.01159373, 0.03028606,
        0.00654464, 0.05866865, 0.01144055, 0.0029235 , 0.02427815,
        0.00617213, 0.00841614, 0.01900528, 0.01890872, 0.02151379,
        0.02047592, 0.01476746, 0.0039255

# 2. Summary

After training all models (without hyperparameter tuning) on the 100K mobielens data, the best performance comes from the implicit factorizer model. Still the mean mrr is best for the sequential model but there are lost of variability in its predictions.

Sequential:

 'mean_precision': 0.056295620437956204,
 'precision_std': 0.11018630630712584,
 'mean_recall': 0.056295620437956204,
 'recall_std': 0.11018630630712584,
 'mrr_mean': 0.05081374909310326,
 'mrr_std': 0.14060858935239787}


Implicit:

 'mean_precision': 0.47889908256880737,
 'precision_std': 0.22466014883869043,
 'mean_recall': 0.0734139089210515,
 'recall_std': 0.05930864354597846,
 'mrr_mean': 0.03468292471541635,
 'mrr_std': 0.024673809548225196}

Explicit:


'mean_precision': 0.26222222222222225,
 'precision_std': 0.2445302879571262,
 'mean_recall': 0.032389984964951116,
 'recall_std': 0.027798234725610108,
 'mrr_mean': 0.017774100084189223,
 'mrr_std': 0.012817271594728653}