## Bayesian Personalized Rank

In [5]:
!pip install cornac pandas requests sklearn

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [6]:
import sys
import os
import cornac
import pandas as pd
import numpy as np
import pandas as pd

from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_random_split
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from reco_utils.recommender.cornac.cornac_utils import predict_ranking
from reco_utils.common.timer import Timer
from reco_utils.common.constants import SEED

In [7]:
class BPR:
    def __init__(self):
        self.bpr = cornac.models.BPR(k=200,
                                     max_iter=100,
                                     learning_rate=0.01,
                                     lambda_reg=0.001,
                                     verbose=True,
                                     seed=SEED)

    def fit(self, train):
        train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
        print('Number of users: {}'.format(train_set.num_users))
        print('Number of items: {}'.format(train_set.num_items))

        with Timer() as t:
            self.bpr.fit(train_set)
        print("Took {} seconds for training.".format(t))

    def predict(self, feed_dict):
        user = feed_dict["user_id"][0]
        items = feed_dict["item_id"]
        with Timer() as t:
            all_predictions = predict_ranking(self.bpr, None, usercol='userID', itemcol='itemID', remove_seen=False)
        print("Took {} seconds for prediction.".format(t))
        return all_predictions [all_predictions.userID==user & all_predictions.itemID.isin(items)]

## Example of use

In [8]:
data = movielens.load_pandas_df(
    size="100k", #100k, 1m, 10m, or 20m
    header=["userID", "itemID", "rating"]
)

train, test = python_random_split(data, 0.75)

feed_dict = {
        'user_id': np.full(10, 153),
        'item_id': train["itemID"][:15].to_numpy(),
    }

model = BPR()
model.fit(train) # train is pd.DataFrame with columns: userID, itemID, rating
model.predict(feed_dict) # feed_dict as in Dawid's model evaluation

100%|██████████| 4.81k/4.81k [00:02<00:00, 1.76kKB/s]
  3%|▎         | 3/100 [00:00<00:04, 21.57it/s, correct=83.84%, skipped=9.20%]

Number of users: 943
Number of items: 1642


100%|██████████| 100/100 [00:04<00:00, 21.35it/s, correct=92.09%, skipped=9.09%]


Optimization finished!
Took 4.6931 seconds for training.
Took 1.1474 seconds for prediction.


Unnamed: 0,userID,itemID,prediction
765172,1,901,-2.01152
765173,1,755,0.335445
765174,1,287,0.511043
765175,1,181,4.115204
765176,1,96,3.755646
765177,1,83,2.44639
765178,1,1,3.917794
765179,1,225,1.146336
765180,1,199,1.685099
765181,1,24,3.081897
