## Bayesian Personalized Rank

In [17]:
!pip install cornac pandas requests sklearn

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [18]:
import sys
import os
import cornac
import pandas as pd
import numpy as np
import pandas as pd
from tqdm import tqdm

from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_random_split
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from reco_utils.recommender.cornac.cornac_utils import predict_ranking
from reco_utils.common.timer import Timer
from reco_utils.common.constants import SEED

In [19]:
class BPR:
    def __init__(self):
        self.bpr = cornac.models.BPR(k=200,
                                     max_iter=100,
                                     learning_rate=0.01,
                                     lambda_reg=0.001,
                                     verbose=True,
                                     seed=SEED)
        self.all_predictions=None

    def fit(self, train):
        self.all_predictions=None
        train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
        print('Number of users: {}'.format(train_set.num_users))
        print('Number of items: {}'.format(train_set.num_items))

        with Timer() as t:
            self.bpr.fit(train_set)
        print("Took {} seconds for training.".format(t))

    def get_pred(self,row):
        list_pred=self.all_predictions [(self.all_predictions.userID==row[0]) & (self.all_predictions.itemID==row[1])]["prediction"].tolist()
        return list_pred[0] if list_pred else 0 #maybe -10 ??
        
    def predict(self, feed_dict):
        #print('users', feed_dict["user_id"])
        #print('items', feed_dict["item_id"])
        users = feed_dict["user_id"]
        user = users[0]
        items = feed_dict["item_id"]
        if self.all_predictions is None:
            with Timer() as t:
                self.all_predictions = predict_ranking(self.bpr, None, usercol='userID', itemcol='itemID', remove_seen=False)
            print("Took {} seconds for prediction.".format(t))
        df = pd.DataFrame(np.transpose(np.vstack((users,items))))
        df['prediction'] = df.apply (lambda row: self.get_pred(row), axis=1)
        return df['prediction'].tolist()

## Leave one out

In [20]:
def model_test(model, file_name):
    COLS = ['userID', 'itemID', 'rating', 'timestamp']
    train_data = pd.read_csv("Data/movielens.train_explicit_ds",sep='\t', names=COLS)

    model.fit(train_data)

    test_items = np.load('Data/test_items.npy', allow_pickle=True)
    test_users = np.load('Data/test_users.npy', allow_pickle=True)
    predictions = []
    for users, items in tqdm(zip(test_users, test_items)):
        feed_dict={'user_id': users, 'item_id': items}
        p = model.predict(feed_dict)
        #print(len(items),len(p))
        predictions.append(p)
    
    np.save('Predictions/'+file_name, predictions)

In [21]:
model_test(BPR(), 'bpr')

  2%|▏         | 2/100 [00:00<00:05, 17.56it/s, correct=85.39%, skipped=12.05%]

Number of users: 943
Number of items: 1680


100%|██████████| 100/100 [00:05<00:00, 16.81it/s, correct=94.07%, skipped=12.14%]
0it [00:00, ?it/s]

Optimization finished!
Took 5.9600 seconds for training.
Took 1.2552 seconds for prediction.


943it [04:10,  3.76it/s]


In [22]:
np.load('Predictions/'+'bpr.npy')

array([[-0.84978002, -0.57607543, -0.14493404, ..., -0.08240199,
         2.49868298, -0.92150867],
       [-1.4156729 , -1.05214524, -3.02378964, ...,  2.19568563,
        -0.5386427 ,  2.92233276],
       [-2.21766901, -3.32058573, -1.09007227, ..., -2.77728033,
        -3.12943149,  3.42444611],
       ...,
       [-2.08446383,  1.6115917 ,  0.34003991, ...,  1.31036294,
         0.7359587 ,  3.52911282],
       [-1.66564274, -0.5490877 , -2.31219816, ..., -2.00165796,
        -1.9971993 ,  1.52274096],
       [-1.49695039, -3.22197604, -0.52941251, ...,  0.54471171,
         2.39169216,  5.49815607]])