In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import time

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml

  return f(*args, **kwds)


In [34]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, reciprocal_rank

In [7]:
from lrann.datasets import DataLoader, random_train_test_split
from lrann.estimators import ImplicitEst, ExplicitEst
from lrann.models import BilinearNet
from lrann.utils import is_cuda_available
from lrann.evaluations import mrr_score, precision_recall_score

  return f(*args, **kwds)


## Config

In [8]:
config = yaml.load(open('experiment_config.yml', 'r'))

  """Entry point for launching an IPython kernel.


In [9]:
config

{'train_test_split_seed': 42,
 'torch_init_seed': 42,
 'estimator_init_seed': 42,
 'test_percentage': 0.2,
 'embedding_dim': 32,
 'mf_grid_search': {'torch_init_seed': [42, 147, 17, 28, 83],
  'l2': [0.0, 0.003, 0.01, 0.03],
  'learning_rate': [0.001, 0.003, 0.01, 0.03],
  'n_epochs': 20,
  'eval_prec_k': [1, 5, 10]}}

## Data

In [10]:
data = DataLoader().load_movielens('100k')
data.binarize_(use_user_mean=True)

In [11]:
pd.Series(data.ratings).value_counts(normalize=False)

 1.0    54732
-1.0    46104
dtype: int64

In [12]:
train_data, test_data = random_train_test_split(data, test_percentage=config['test_percentage'],
                                                random_state=np.random.RandomState(seed=config['train_test_split_seed']))

In [13]:
train_data

<Interactions dataset (610 users x 9724 items x 80668 interactions)>

In [14]:
test_data

<Interactions dataset (610 users x 9724 items x 20168 interactions)>

## Load Best BilinearNet Model

In [15]:
results_df = pd.read_csv('bilinear_net_mf_hyperparam_opt_res.csv')
best_config = results_df.sort_values('mrr', ascending=False).iloc[0]

In [16]:
best_config

torch_init_seed    17.000000
learning_rate       0.003000
l2                  0.000000
epoch               9.000000
mrr                 0.031317
prec_at_1           0.116393
prec_at_5           0.096721
prec_at_10          0.083279
Name: 489, dtype: float64

In [26]:
mf_model = BilinearNet(data.n_users, data.n_items,
                       embedding_dim=config['embedding_dim'],
                       torch_seed=int(best_config['torch_init_seed']))

In [27]:
mf_est = ImplicitEst(model=mf_model, 
                     n_iter=int(best_config['epoch'])+1, 
                     use_cuda=is_cuda_available(),
                     random_state=np.random.RandomState(seed=config['estimator_init_seed']),
                     l2=best_config['l2'],
                     learning_rate=best_config['learning_rate'])

In [28]:
mf_est.fit(train_data, verbose=True)

Epoch 0: loss 0.3647658835783624
Epoch 1: loss 0.1733316681606078
Epoch 2: loss 0.1454951857141383
Epoch 3: loss 0.12945615282952314
Epoch 4: loss 0.11834215751233645
Epoch 5: loss 0.10910051428194847
Epoch 6: loss 0.1023277845676463
Epoch 7: loss 0.09534016713830636
Epoch 8: loss 0.08985964068921545
Epoch 9: loss 0.08554287372000527


In [29]:
mrr_score(mf_est, test_data).mean()

0.03131719286999913

In [30]:
precision_recall_score(mf_est, test_data, k=10)[0].mean()

0.08327868852459015

Yes, we could exactly reproduce our best model

## Train LightFM

In [40]:
train_sparse = train_data.tocoo()
test_sparse = test_data.tocoo()

In [57]:
model = LightFM(no_components=config['embedding_dim'], loss='bpr', learning_rate=0.05)

In [58]:
model.fit(train_sparse, epochs=10)

<lightfm.lightfm.LightFM at 0x12a588ba8>

In [59]:
reciprocal_rank(model, test_sparse).mean()

0.22279325

In [60]:
precision_at_k(model, test_sparse, k=10).mean()

0.08229508