In [23]:
import sys
import os
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.8.10 (default, Sep 28 2021, 16:10:42) 
[GCC 9.3.0]
Cornac version: 1.14.2


In [24]:
data = []
with open('../data/um.dat', 'r') as infile:
    for line in infile.readlines():
        user, item, rating = line.strip().split('\t')
        data.append([user, item, rating])
        
R = pd.DataFrame(data=data, columns=["userID", "itemID", "rating"])

In [25]:
data = []
with open('../data/um_0.8.train', 'r') as infile:
    for line in infile.readlines():
        user, item, rating = line.strip().split('\t')
        data.append([user, item, rating])
        
train = pd.DataFrame(data=data, columns=["userID", "itemID", "rating"])
data = []
with open('../data/um_0.8.test', 'r') as infile:
    for line in infile.readlines():
        user, item, rating = line.strip().split('\t')
        data.append([user, item, rating])
        
test = pd.DataFrame(data=data, columns=["userID", "itemID", "rating"])

In [26]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))
NUM_FACTORS = 200
NUM_EPOCHS = 100
bpr = cornac.models.BPR(
    k=NUM_FACTORS,
    max_iter=NUM_EPOCHS,
    learning_rate=0.01,
    lambda_reg=0.001,
    verbose=True,
    seed=SEED
)

Number of users: 3872
Number of items: 9340


In [27]:
with Timer() as t:
    bpr.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/100 [00:00<?, ?it/s]

Optimization finished!
Took 7.0481 seconds for training.


In [28]:
with Timer() as t:
    all_predictions = predict_ranking(bpr, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 131.3814 seconds for prediction.


In [29]:
all_predictions

Unnamed: 0,userID,itemID,prediction
258697,163,7516,2.346899
258698,163,3984,1.626166
258699,163,7779,2.206607
258700,163,3849,2.548229
258701,163,1604,2.398134
...,...,...,...
36164475,2898,6157,-1.491740
36164476,2898,1247,-1.406347
36164477,2898,3960,-1.345185
36164478,2898,8727,-1.380266


In [30]:
k = 10
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

Precision@K:	0.136264
Recall@K:	0.079071


In [31]:
k = 20
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

Precision@K:	0.112425
Recall@K:	0.126796
