# Step 1: 
* define Imports 

In [None]:
cd /Users/juiashinkar/Recommenders

/Users/juiashinkar/Recommenders


In [None]:
import sys
sys.path.append("../../")
import os
import cornac
import papermill as pm
import pandas as pd
from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_random_split
from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from reco_utils.recommender.cornac.cornac_utils import predict_ranking
from reco_utils.common.timer import Timer
from reco_utils.common.constants import SEED

print("System version: {}".format(sys.version))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.6.10 |Anaconda, Inc.| (default, May  7 2020, 23:06:31) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
Cornac version: 1.7.1


# Step 2: 
* Read the Dataset 

In [None]:
df = pd.read_csv("/Users/juiashinkar/Desktop/BPR/User_data_ALL.csv")
df =df.drop(columns=["Country", "Category"], axis=1)
df =df.rename(columns={"userId": "userID", "Product_Id": "itemID", "Ratings":"rating"})

In [None]:
df.head()

Unnamed: 0,userID,itemID,rating
0,689,101,5
1,689,102,3
2,689,103,3
3,689,104,2
4,689,105,3


# Training 

In [None]:
MOVIELENS_DATA_SIZE = '100k'
TOP_K = 10

# Model parameters
NUM_FACTORS = 200
NUM_EPOCHS = 100

In [None]:
train, test = python_random_split(df, 0.75)

In [None]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)



In [None]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 327
Number of items: 20


In [None]:
bpr = cornac.models.BPR(
    k=NUM_FACTORS,
    max_iter=NUM_EPOCHS,
    learning_rate=0.01,
    lambda_reg=0.001,
    verbose=True,
    seed=SEED
)

In [None]:
with Timer() as t:
    bpr.fit(train_set)
print("Took {} seconds for training.".format(t))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))


Optimization finished!
Took 0.2795 seconds for training.


In [None]:
import pickle

In [None]:
pickle.dump( bpr, open( "trained_model.p", "wb" ) )

In [None]:
os.listdir()

['AUTHORS.md',
 'tools',
 '.DS_Store',
 'LICENSE',
 'trained_model.p',
 'SETUP.md',
 'reco_utils',
 'GLOSSARY.md',
 'tests',
 'reco_base.yaml',
 'docs',
 'contrib',
 'README.md',
 'setup.py',
 '.gitignore',
 'CONTRIBUTING.md',
 'examples',
 '.github',
 'scenarios',
 '.git',
 'SECURITY.md']

# Test

In [None]:
import pickle
bpr = pickle.load( open( "trained_model.p", "rb" ) )

In [None]:
with Timer() as t:
    all_predictions = predict_ranking(bpr, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 0.0488 seconds for prediction.


In [None]:
k = 10
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.959820
NDCG:	0.961406
Precision@K:	0.180469
Recall@K:	0.954994


# Test on Input

In [None]:
#689
101


101

In [None]:
user = input("Enter User ID")
item = input("Enter Item ID ")

df1 = pd.DataFrame(data={
    "userID":[int(user)],
    "itemID":[int(item)]
})

Enter User ID689
Enter Item ID 101


In [None]:
with Timer() as t:
    foo = predict_ranking(bpr, df1, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 0.0244 seconds for prediction.


In [None]:
foo["userID"] = foo["userID"] .apply(lambda x :  round(x))
foo["itemID"] = foo["itemID"] .apply(lambda x :  round(x))

In [None]:
result = foo[foo["userID"]==int(user)].sort_values(by='prediction', ascending=False).head(10)

In [None]:
result = result.drop(columns=["prediction", "userID"], axis=1)

In [None]:
result.to_json()

'{"itemID":{"5348":103,"5355":105,"5353":102,"5358":104,"5349":401,"5347":302,"5345":403,"5351":402,"5341":404,"5346":405}}'