In [1]:
import numpy as np
import pandas as pd

from libreco.data import DatasetPure
from libreco.algorithms import BPR, WideDeep
from libreco.evaluation import evaluate

from sklearn.model_selection import train_test_split

Instructions for updating:
non-resource variables are not supported in the long term


# Preprocess Data

In [2]:
books_df = pd.read_csv('../data/Books.csv', delimiter=';', low_memory=False)
ratings_df = pd.read_csv('../data/Ratings.csv', delimiter=';')
users_df = pd.read_csv('../data/Users.csv', delimiter=';')

  users_df = pd.read_csv('../data/Users.csv', delimiter=';')


In [3]:
ratings_df.columns = ['user_id', 'item_id', 'label']

In [4]:
ratings_df = ratings_df[ratings_df['label'] != 0]

In [5]:
book_str_to_int = {book_id: i for i, book_id in enumerate(ratings_df['item_id'].unique())}
user_str_to_int = {user_id: i for i, user_id in enumerate(ratings_df['user_id'].unique())}

ratings_df['item'] = ratings_df['item_id'].map(book_str_to_int).to_numpy()
ratings_df['user'] = ratings_df['user_id'].map(user_str_to_int).to_numpy()

# Train / Eval Split

In [6]:
train_df, eval_df = train_test_split(ratings_df, test_size=0.2, random_state=0)

In [7]:
eval_user_mask = eval_df['user'].isin(train_df['user'].unique())
eval_item_mask = eval_df['item'].isin(train_df['item'].unique())

eval_df = eval_df[eval_user_mask & eval_item_mask]

# 'user', 'item' must be the first two columns of the dataframe
train_df = train_df[['user', 'item', 'label']]
eval_df = eval_df[['user', 'item', 'label']]

In [8]:
train_data, data_info = DatasetPure.build_trainset(train_df)
eval_data = DatasetPure.build_evalset(eval_df)

# Bayesian Personalized Ranking and Wide Deep


In [9]:
embedding_size = 16
n_epochs_bpr = 10
n_epochs_wide = 10
learning_rate = 0.01
batch_size = 256
random_seed = 42

In [10]:
# init BPR model
bpr = BPR(
    task="ranking",  
    data_info=data_info,
    embed_size=embedding_size,  
    n_epochs=n_epochs_bpr,  
    lr=learning_rate,  
    reg=None,  
    batch_size=batch_size,  
    num_neg=1,  
    use_tf=True,  
    seed=random_seed
)

In [11]:
# init the WideDeep model
wide_deep = WideDeep(
    task="ranking", 
    data_info=data_info,
    embed_size=embedding_size,
    n_epochs=n_epochs_wide,
    lr={'wide': learning_rate, 'deep': learning_rate/10},
    batch_size=batch_size,
    use_bn=True, 
    hidden_units=[64, 32],  
    reg=None,  
    dropout_rate=None,  
    num_neg=1,  
    seed=random_seed
)

In [12]:
# train BPR model
bpr.fit(
    train_data, 
    neg_sampling=True,
    verbose=2, 
    shuffle=True, 
    eval_data=eval_data, 
    metrics=["ndcg", "precision"]
)

Training start time: [35m2024-08-10 17:29:55[0m


2024-08-10 17:29:55.888409: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2024-08-10 17:29:55.896630: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
train: 100%|██████████| 1356/1356 [00:10<00:00, 131.42it/s]


Epoch 1 elapsed: 10.339s
	 [32mtrain_loss: 0.6237[0m


eval_listwise: 100%|██████████| 16331/16331 [02:19<00:00, 117.14it/s]


	 eval ndcg@10: 0.0183
	 eval precision@10: 0.0037


train: 100%|██████████| 1356/1356 [00:09<00:00, 138.86it/s]


Epoch 2 elapsed: 9.768s
	 [32mtrain_loss: 0.4647[0m


eval_listwise: 100%|██████████| 16331/16331 [01:19<00:00, 205.16it/s]


	 eval ndcg@10: 0.0141
	 eval precision@10: 0.0030


train: 100%|██████████| 1356/1356 [00:09<00:00, 138.54it/s]


Epoch 3 elapsed: 9.794s
	 [32mtrain_loss: 0.229[0m


eval_listwise: 100%|██████████| 16331/16331 [01:18<00:00, 207.76it/s]


	 eval ndcg@10: 0.0118
	 eval precision@10: 0.0025


train: 100%|██████████| 1356/1356 [00:09<00:00, 141.03it/s]


Epoch 4 elapsed: 9.617s
	 [32mtrain_loss: 0.1262[0m


eval_listwise: 100%|██████████| 16331/16331 [01:06<00:00, 244.72it/s]


	 eval ndcg@10: 0.0108
	 eval precision@10: 0.0024


train: 100%|██████████| 1356/1356 [00:14<00:00, 91.00it/s]


Epoch 5 elapsed: 14.904s
	 [32mtrain_loss: 0.0853[0m


eval_listwise: 100%|██████████| 16331/16331 [01:45<00:00, 155.12it/s]


	 eval ndcg@10: 0.0096
	 eval precision@10: 0.0020


train: 100%|██████████| 1356/1356 [00:09<00:00, 138.89it/s]


Epoch 6 elapsed: 9.769s
	 [32mtrain_loss: 0.0663[0m


eval_listwise: 100%|██████████| 16331/16331 [01:11<00:00, 226.98it/s]


	 eval ndcg@10: 0.0085
	 eval precision@10: 0.0018


train: 100%|██████████| 1356/1356 [00:09<00:00, 142.69it/s]


Epoch 7 elapsed: 9.506s
	 [32mtrain_loss: 0.0567[0m


eval_listwise: 100%|██████████| 16331/16331 [01:11<00:00, 228.70it/s]


	 eval ndcg@10: 0.0095
	 eval precision@10: 0.0020


train: 100%|██████████| 1356/1356 [00:09<00:00, 144.93it/s]


Epoch 8 elapsed: 9.359s
	 [32mtrain_loss: 0.0492[0m


eval_listwise: 100%|██████████| 16331/16331 [01:07<00:00, 240.50it/s]


	 eval ndcg@10: 0.0091
	 eval precision@10: 0.0019


train: 100%|██████████| 1356/1356 [00:09<00:00, 142.29it/s]


Epoch 9 elapsed: 9.532s
	 [32mtrain_loss: 0.0449[0m


eval_listwise: 100%|██████████| 16331/16331 [01:10<00:00, 231.46it/s]


	 eval ndcg@10: 0.0086
	 eval precision@10: 0.0018


train: 100%|██████████| 1356/1356 [00:09<00:00, 142.19it/s]


Epoch 10 elapsed: 9.540s
	 [32mtrain_loss: 0.0419[0m


eval_listwise: 100%|██████████| 16331/16331 [02:43<00:00, 99.64it/s] 


	 eval ndcg@10: 0.0082
	 eval precision@10: 0.0018


In [13]:
# train WideDeep model
wide_deep.fit(
    train_data, 
    neg_sampling=True,
    verbose=2, 
    shuffle=True, 
    eval_data=eval_data, 
    metrics=["ndcg", "precision"]
)

Training start time: [35m2024-08-10 17:46:55[0m
Instructions for updating:
Colocations handled automatically by placer.


  net = tf.layers.batch_normalization(net, training=is_training)
Instructions for updating:
Colocations handled automatically by placer.
  net = tf.layers.batch_normalization(net, training=is_training)


total params: [33m7,663,453[0m | embedding params: [33m7,659,131[0m | network params: [33m4,322[0m


train: 100%|██████████| 2711/2711 [00:21<00:00, 125.90it/s]


Epoch 1 elapsed: 21.534s
	 [32mtrain_loss: 0.6343[0m


eval_listwise: 100%|██████████| 16331/16331 [16:58<00:00, 16.04it/s]


	 eval ndcg@10: 0.0076
	 eval precision@10: 0.0018


train: 100%|██████████| 2711/2711 [00:39<00:00, 68.04it/s]


Epoch 2 elapsed: 39.846s
	 [32mtrain_loss: 0.5808[0m


eval_listwise: 100%|██████████| 16331/16331 [15:32<00:00, 17.52it/s]


	 eval ndcg@10: 0.0077
	 eval precision@10: 0.0019


train: 100%|██████████| 2711/2711 [00:20<00:00, 130.90it/s]


Epoch 3 elapsed: 20.711s
	 [32mtrain_loss: 0.4717[0m


eval_listwise: 100%|██████████| 16331/16331 [11:12<00:00, 24.28it/s]


	 eval ndcg@10: 0.0124
	 eval precision@10: 0.0024


train: 100%|██████████| 2711/2711 [00:21<00:00, 125.65it/s]


Epoch 4 elapsed: 21.581s
	 [32mtrain_loss: 0.3346[0m


eval_listwise: 100%|██████████| 16331/16331 [11:36<00:00, 23.46it/s]


	 eval ndcg@10: 0.0066
	 eval precision@10: 0.0014


train: 100%|██████████| 2711/2711 [00:21<00:00, 124.25it/s]


Epoch 5 elapsed: 21.821s
	 [32mtrain_loss: 0.2438[0m


eval_listwise: 100%|██████████| 16331/16331 [11:34<00:00, 23.51it/s]


	 eval ndcg@10: 0.0055
	 eval precision@10: 0.0012


train: 100%|██████████| 2711/2711 [00:21<00:00, 125.90it/s]


Epoch 6 elapsed: 21.534s
	 [32mtrain_loss: 0.1922[0m


eval_listwise: 100%|██████████| 16331/16331 [11:08<00:00, 24.42it/s]


	 eval ndcg@10: 0.0049
	 eval precision@10: 0.0011


train: 100%|██████████| 2711/2711 [00:21<00:00, 124.91it/s]


Epoch 7 elapsed: 21.704s
	 [32mtrain_loss: 0.163[0m


eval_listwise: 100%|██████████| 16331/16331 [11:35<00:00, 23.48it/s]


	 eval ndcg@10: 0.0054
	 eval precision@10: 0.0012


train: 100%|██████████| 2711/2711 [00:21<00:00, 126.68it/s]


Epoch 8 elapsed: 21.402s
	 [32mtrain_loss: 0.1413[0m


eval_listwise: 100%|██████████| 16331/16331 [11:36<00:00, 23.44it/s]


	 eval ndcg@10: 0.0039
	 eval precision@10: 0.0009


train: 100%|██████████| 2711/2711 [00:21<00:00, 126.86it/s]


Epoch 9 elapsed: 21.372s
	 [32mtrain_loss: 0.1277[0m


eval_listwise: 100%|██████████| 16331/16331 [11:14<00:00, 24.21it/s]


	 eval ndcg@10: 0.0063
	 eval precision@10: 0.0013


train: 100%|██████████| 2711/2711 [00:21<00:00, 124.26it/s]


Epoch 10 elapsed: 21.818s
	 [32mtrain_loss: 0.116[0m


eval_listwise: 100%|██████████| 16331/16331 [11:20<00:00, 23.99it/s]


	 eval ndcg@10: 0.0070
	 eval precision@10: 0.0015


In [14]:
# evaluate the BPR model
bpr_eval_result = evaluate(bpr, eval_data, neg_sampling=True, metrics=["ndcg", "precision", "recall"])
print(f"Evaluation Results (BPR):\n{bpr_eval_result}")

eval_listwise: 100%|██████████| 16331/16331 [01:09<00:00, 235.18it/s]


Evaluation Results (BPR):
{'ndcg': 0.008218662924561774, 'precision': 0.001818627150817464, 'recall': 0.008496358810695952}


In [15]:
# evaluate the WideDeep model
wide_deep_eval_result = evaluate(wide_deep, eval_data, neg_sampling=True, metrics=["ndcg", "precision", "recall"])
print(f"Evaluation Results (WideDeep):\n{wide_deep_eval_result}")

eval_listwise: 100%|██████████| 16331/16331 [30:14<00:00,  9.00it/s]  


Evaluation Results (WideDeep):
{'ndcg': 0.00704205306980035, 'precision': 0.0014757210213704, 'recall': 0.0062481205328919365}


In [16]:
user_id = train_df.iloc[:10]['user'].values
item_id = train_df.iloc[:10]['item'].values

prediction = bpr.predict(user_id, item_id)
print(f"BPR Prediction for user {user_id} and item {item_id}: {prediction}")
prediction = wide_deep.predict(user_id, item_id)
print(f"WideDeep Prediction for user {user_id} and item {item_id}: {prediction}")

BPR Prediction for user [44828 13679 67574  5478 50595 44797  9787 67068    64 76536] and item [129010  52858 127059  27618 139955   4218  42336  43591    202   3399]: [0.998423   0.9999753  0.99999416 0.99969935 0.99830866 0.9891834
 0.99997854 0.9968299  0.9996921  0.99963665]
WideDeep Prediction for user [44828 13679 67574  5478 50595 44797  9787 67068    64 76536] and item [129010  52858 127059  27618 139955   4218  42336  43591    202   3399]: [0.9969753  0.99936503 0.96080226 0.99981624 0.9465156  0.9660195
 0.98954964 0.9977005  0.99968636 0.9982249 ]


In [17]:
train_df.iloc[:10]

Unnamed: 0,user,item,label
654197,44828,129010,8
203372,13679,52858,10
1000213,67574,127059,10
84497,5478,27618,8
740145,50595,139955,6
652558,44797,4218,5
145942,9787,42336,8
991352,67068,43591,8
413,64,202,10
1133263,76536,3399,7
