# Indexable Representation Learning

As mentioned in the presentation, indexable representation refers to recommendation algorihtms whose latent vector representations are immediately sublinear searchable. In this tutorial, we are going to experiment with one of such models, namely Indexable Bayesian Personalized Ranking or IBPR for short.

In [1]:
%matplotlib inline
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
import scipy.sparse as ss

from cornac.eval_methods import BaseMethod
from cornac.models import BPR, IBPR
from utils.lsh import *
from utils.load_data import *
from utils.pmf import *
from utils.evaluation import *

In [2]:
train   = pickle.load(open('train_data', 'rb'))
test    = pickle.load(open('test_data', 'rb'))

In [3]:
eval_method = BaseMethod.from_provided(train_data=train, test_data=test,
                                       exclude_unknowns=False, verbose=True)

rating_threshold = 1.0
exclude_unknowns = False
Building training set
Number of training users = 6040
Number of training items = 3659
Max rating = 5.0
Min rating = 1.0
Global mean = 3.6
Building test set
Number of tested users = 6040
Number of unknown users = 0
Number of unknown items = 47


In [4]:
rec_bpr = BPR(k = 50, max_iter=50, learning_rate=0.05, lamda=0.001, batch_size=5000, init_params={'U':None, 'V':None})
rec_bpr.fit(eval_method.train_set)

pickle.dump(rec_bpr, open('bpr.model', 'wb'))
#rec_bpr = pickle.load(rec_bpr, open('bpr.model', 'rb'))

Shafling the data
epoch: 0 loss: tensor(5307.4399, grad_fn=<ThSubBackward>)


In [6]:
#number of recommendations
topK = 10

#construct the recommendation list
inv_lst = np.unique(test[:, 0]).astype(int)
bpr_rec_list = {}
for inv in inv_lst:
    if bpr_rec_list.get(inv, None) is None:
        bpr_rec_list[inv] = rec_bpr.rank(inv)[:topK]
        
bpr_prec, bpr_recall = evaluate_topK(test, bpr_rec_list, topK)
print('bpr_prec@{0} \t bpr_recall@{0}'.format(topK))
print('{0}\t{1}'.format(bpr_prec, bpr_recall))
print('-----------------------------------------------------------------------')

topK = 10
b_vals = [4]
L_vals = [10]

queries = rec_bpr.U
data    = rec_bpr.V

print('#table\t #bit \t relative_prec@{0} \t relative_recall@{0} \t touched'.format(topK))
for nt in L_vals:
    print('-----------------------------------------------------------------------')
    for b in b_vals: 
        prec, recall, touched = evaluate_LSHTopK(test, data, -queries, CosineHashFamily, nt, b, dot, topK)
        print("{0}\t{1}\t{2}\t{3}\t{4}".format(nt, b, prec/bpr_prec, recall/bpr_recall, touched)) 


bpr_prec@10 	 bpr_recall@10
0.013344370860927147	0.002546935131049344
#table	 #bit 	 relative_prec@10 	 relative_recall@10 	 touched
-----------------------------------------------------------------------
10	4	0.9292803970223333	0.9344277772763477	0.46731057774624485


In [6]:
rec_ibpr = IBPR(k = 50, max_iter=50, learning_rate=0.05, lamda=0.001, batch_size=5000, init_params={'U':None, 'V':None})
rec_ibpr.fit(eval_method.train_set)

pickle.dump(rec_ibpr, open('ibpr.model', 'wb'))
#rec_ibpr = pickle.load(open('ibpr.model', 'rb'))

Shafling the data
epoch: 0 loss: tensor(3612.6685, grad_fn=<ThSubBackward>)


In [None]:
#number of recommendations
topK = 10

#construct the recommendation list
inv_lst = np.unique(test[:, 0]).astype(int)
ibpr_rec_list = {}
for inv in inv_lst:
    if ibpr_rec_list.get(inv, None) is None:
        ibpr_rec_list[inv] = rec_ibpr.rank(inv)[:topK]
        
ibpr_prec, ibpr_recall = evaluate_topK(test, ibpr_rec_list, topK)
print('ibpr_prec@{0} \t ibpr_recall@{0}'.format(topK))
print('{0}\t{1}'.format(ibpr_prec, ibpr_recall))
print('-----------------------------------------------------------------------')

topK = 10
b_vals = [4]
L_vals = [10]

queries = rec_ibpr.U
data    = rec_ibpr.V

print('#table\t #bit \t relative_prec@{0} \t relative_recall@{0} \t touched'.format(topK))
for nt in L_vals:
    print('-----------------------------------------------------------------------')
    for b in b_vals: 
        prec, recall, touched = evaluate_LSHTopK(test, data, -queries, CosineHashFamily, nt, b, dot, topK)
        print("{0}\t{1}\t{2}\t{3}\t{4}".format(nt, b, prec/ibpr_prec, recall/ibpr_recall, touched)) 

ibpr_prec@10 	 ibpr_recall@10
0.013112582781456928	0.0025468861595255837
-----------------------------------------------------------------------
#table	 #bit 	 relative_prec@10 	 relative_recall@10 	 touched
-----------------------------------------------------------------------
