# Ensemble by ranked list merging

In [1]:
%load_ext autoreload

from os import chdir
chdir('../')

from src.utils.loader import *
from src.utils.evaluator import Evaluator

dataset = Dataset(load_tags=True,
                  filter_tag=False,
                  weight_tag=False)
print("Dataset loaded...")

Dataset loaded...


In [2]:
from src.CBF.CBF_tfidf import ContentBasedFiltering
from src.UBF.UBF2 import UserBasedFiltering
from src.IBF.IBF import ItemBasedFiltering

print("Models imported...")

Models imported...


## Initialize models
### SLIM BPR

In [1]:
%autoreload 2

from src.ML.BPRSLIM import BPRSLIM
from src.utils.matrix_utils import applyTfIdf, top_k_filtering

import os
dataset.set_track_attr_weights_2(1, 1, 0, 0, 0, 0, 0, 0, 0)
ev = Evaluator(seed=False)
ev.cross_validation(5, dataset.train_final.copy())

print('Building the ICM for SLIMBPR...')
icm = dataset.build_icm()

icm_tag = dataset.build_tags_matrix()

tags = applyTfIdf(icm_tag)

# Before stacking tags with the rest of the ICM, we keep only
# the top K tags for each item. This way we try to reduce the
# natural noise added by such sparse features.
tags = top_k_filtering(tags.transpose(), topK=55).transpose()
tags.data = np.ones_like(tags.data)

# stack all
icm = vstack([icm, tags], format='csr')

UsageError: Line magic function `%autoreload` not found.


In [4]:
slim = BPRSLIM(epochs=30,
               epochMultiplier=1.0,
               sgd_mode='rmsprop',
               learning_rate=5e-02,
               topK=300,
               urmSamplingChances=1 / 5,
               icmSamplingChances=4 / 5)

Building the ICM for SLIMBPR...
Compiling in Cython...
Compiled module saved in subfolder: /src/ML/Cython
Compilation complete!


In [5]:
# Initiliaze Models
dataset.set_track_attr_weights_2(1.5, 1.6, 0.0, 0.0, 0.0,
                                 1.0, 0.0, 0.0, 0.0)

# Models
cbf = ContentBasedFiltering()
ibf = ItemBasedFiltering()

print("Models initialized...")

Models initialized...


### SLIM BPR Fit

In [6]:
# Validation
urm, tg_tracks, tg_playlist = ev.get_fold(dataset)

slim.set_evaluation_every(10, ev)
slim.fit(urm.tocsr(),
         icm.tocsr(),
         tg_playlist,
         tg_tracks,
         dataset)

slim_recs = slim.predict()

File found, retrieving urm from it.
Load from file takes 0.69 seconds
Running fit process.
Processed 500000 ( 29.85% ) in 49.78 seconds. Sample per second: 10045
Processed 1000000 ( 59.70% ) in 18.66 seconds. Sample per second: 14779
Processed 1500000 ( 89.54% ) in 16.10 seconds. Sample per second: 18050
Processed 1675142 ( 100.00% ) in 5.20 seconds. Sample per second: 18993
Return S matrix to python caller...
Epoch 0 of 30 complete in 1.79 minutes
Processed 500000 ( 29.85% ) in 11.69 seconds. Sample per second: 42764
Processed 1000000 ( 59.70% ) in 12.20 seconds. Sample per second: 43098
Processed 1500000 ( 89.54% ) in 12.42 seconds. Sample per second: 42351
Processed 1675142 ( 100.00% ) in 4.76 seconds. Sample per second: 42131
Return S matrix to python caller...
Epoch 1 of 30 complete in 1.08 minutes
Processed 500000 ( 29.85% ) in 13.70 seconds. Sample per second: 36505
Processed 1000000 ( 59.70% ) in 12.87 seconds. Sample per second: 38652
Processed 1500000 ( 89.54% ) in 13.25 seco



MAP@5: 0.09923923697504357
Epoch 10 of 30 complete in 1.84 minutes
Processed 500000 ( 29.85% ) in 22.61 seconds. Sample per second: 22118
Processed 1000000 ( 59.70% ) in 14.49 seconds. Sample per second: 27403
Processed 1500000 ( 89.54% ) in 13.63 seconds. Sample per second: 30225
Processed 1675142 ( 100.00% ) in 5.34 seconds. Sample per second: 30827
Return S matrix to python caller...
Epoch 11 of 30 complete in 1.69 minutes
Processed 500000 ( 29.85% ) in 25.37 seconds. Sample per second: 19706
Processed 1000000 ( 59.70% ) in 13.21 seconds. Sample per second: 26168
Processed 1500000 ( 89.54% ) in 13.38 seconds. Sample per second: 29192
Processed 1675142 ( 100.00% ) in 4.98 seconds. Sample per second: 29923
Return S matrix to python caller...
Epoch 12 of 30 complete in 1.72 minutes
Processed 500000 ( 29.85% ) in 22.78 seconds. Sample per second: 21948
Processed 1000000 ( 59.70% ) in 13.88 seconds. Sample per second: 27868
Processed 1500000 ( 89.54% ) in 13.60 seconds. Sample per second

In [7]:
# If not present, save SLIM BPR parameters to disk
w_path = './data/slim_bpr_parameters.npz'
if not os.path.isfile(w_path):
    save_sparse_matrix(w_path, slim.getParameters())

In [10]:
from src.Ensemble.ranked_list_merging import interleaved_merging

def merge_ranked_lists(recs):
    merged_recs = {}
    for playlist in recs[0].keys():
        lists = [rec[playlist] for rec in recs]
        # Pick 3 from CBF and 2 from IBF
        merged = interleaved_merging(lists, [3, 2], topK=5, mode="continue")
        merged_recs[playlist] = merged
    return merged_recs

# Fit models
# CBF
cbf.fit(urm,
        list(tg_playlist),
        list(tg_tracks),
        dataset)
cbf_recs = cbf.predict()
print("Evaluating CBF...")
ev.evaluate_fold(cbf_recs)

# IBF
#ibf.fit(urm,
#        list(tg_playlist),
#        list(tg_tracks),
#        dataset)
#ibf_recs = ibf.predict()
#print("Evaluating IBF...")
#ev.evaluate_fold(ibf_recs)

print("Evaluating SLIM BPR...")
ev.evaluate_fold(slim_recs)

merged_recs = merge_ranked_lists([cbf_recs, slim_recs])
print("Evaluating Merged recs...")
ev.evaluate_fold(merged_recs)

CBF started
Build tags matrix and apply TFIDF...
Running 4 workers...
[ 1303 ] Building cosine similarity matrix for [0, 1000)...
[ 1304 ] Building cosine similarity matrix for [4319, 5319)...
[ 1305 ] Building cosine similarity matrix for [8638, 9638)...
[ 1306 ] Building cosine similarity matrix for [12957, 13957)...
[ 1305 ] Building cosine similarity matrix for [9638, 10638)...
[ 1303 ] Building cosine similarity matrix for [1000, 2000)...
[ 1304 ] Building cosine similarity matrix for [5319, 6319)...
[ 1306 ] Building cosine similarity matrix for [13957, 14957)...
[ 1305 ] Building cosine similarity matrix for [10638, 11638)...
[ 1304 ] Building cosine similarity matrix for [6319, 7319)...
[ 1306 ] Building cosine similarity matrix for [14957, 15957)...
[ 1303 ] Building cosine similarity matrix for [2000, 3000)...
[ 1305 ] Building cosine similarity matrix for [11638, 12638)...
[ 1306 ] Building cosine similarity matrix for [15957, 16957)...
[ 1303 ] Building cosine similarity ma

0.11588253034758354