In [2]:
import warnings
warnings.filterwarnings('ignore')

import sys
import os
# import torch
import cornac
import pandas as pd
import joblib

from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
# print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.8.0 (tags/v3.8.0:fa919fd, Oct 14 2019, 19:37:50) [MSC v.1916 64 bit (AMD64)]
Cornac version: 1.17


In [2]:
# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 0.001

In [3]:
data = pd.read_csv('../datasets/reviews-cleaned.csv')

data.head()

Unnamed: 0,user,name,rating
0,1 Family Meeple,10 Days in Europe,4.1
1,1 Family Meeple,12 Days,7.0
2,1 Family Meeple,7 Wonders,6.5
3,1 Family Meeple,A Column of Fire,5.0
4,1 Family Meeple,A Feast for Odin,10.0


In [4]:
train, test = python_random_split(data, 0.75)

train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)
print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 3067
Number of items: 4582


In [5]:
bivae = cornac.models.BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

with Timer() as t:
    bivae.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/500 [00:00<?, ?it/s]

Took 1876.6480 seconds for training.


In [7]:
with Timer() as t:
    all_predictions = predict_ranking(bivae, train, usercol='user', itemcol='name', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 29.1711 seconds for prediction.


In [11]:
all_predictions = all_predictions.rename(columns={'user':'userID', 'name':'itemID'})
all_predictions

Unnamed: 0,userID,itemID,prediction
1514036,AmandaDesignsGames,Kanban EV,0.188635
1514037,AmandaDesignsGames,Darwin's Choice,0.011565
1514038,AmandaDesignsGames,Rallyman: GT,0.081046
1514039,AmandaDesignsGames,Kami-sama,0.026116
1514040,AmandaDesignsGames,Round House,0.189510
...,...,...,...
14052989,Kaixo,Warhammer Underworlds: Beastgrave,0.001133
14052990,Kaixo,Peloponnesian War,0.000608
14052991,Kaixo,"Warhammer 40,000 (Eighth Edition)",0.000334
14052992,Kaixo,Boss Monster: Rise of the Minibosses,0.001561


In [14]:
test = test.rename(columns={'user':'userID', 'name':'itemID'})
test

Unnamed: 0,userID,itemID,rating
1921817,twohu2001,Dream Home,7.0
1855348,strubs42,Chimera Station,6.3
474459,Magnus the Blue,Ticket to Ride: The Card Game,5.0
1164015,dontylw,Thurn and Taxis,6.0
328830,Hessu68,Room 25,7.0
...,...,...,...
64238,Bastwood,On the Underground: London/Berlin,7.0
147374,Clapas,Asgard,6.0
1610665,newkillerstar27,Mai-Star,1.0
1584725,montsegur,Palazzo,7.0


In [15]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.042903
NDCG:	0.779087
Precision@K:	0.764949
Recall@K:	0.049715


In [52]:
# Save the model
joblib.dump(bivae, '../model/bivae.pkl')

['../model/bivae.pkl']

In [54]:
# Load the model
model = joblib.load('../model/bivae.pkl')

In [1]:
# df_train = pd.DataFrame({'user':['Kop'], 'name': ['Splendor'], 'rating': [9]})
# df_train

In [65]:
# prediction = predict_ranking(model, df_train, usercol='user', itemcol='name', remove_seen=True)