In [1]:
import numpy as np
import pandas as pd

from libreco.data import DatasetPure
from libreco.algorithms import BPR, WideDeep
from libreco.evaluation import evaluate

from sklearn.model_selection import train_test_split

Instructions for updating:
non-resource variables are not supported in the long term


# Preprocess Data

In [2]:
books_df = pd.read_csv('../data/Books.csv', delimiter=';', low_memory=False)
ratings_df = pd.read_csv('../data/Ratings.csv', delimiter=';')
users_df = pd.read_csv('../data/Users.csv', delimiter=';')

  users_df = pd.read_csv('../data/Users.csv', delimiter=';')


In [3]:
ratings_df.columns = ['user_id', 'item_id', 'label']

In [4]:
ratings_df = ratings_df[ratings_df['label'] != 0]

In [5]:
book_str_to_int = {book_id: i for i, book_id in enumerate(ratings_df['item_id'].unique())}
user_str_to_int = {user_id: i for i, user_id in enumerate(ratings_df['user_id'].unique())}

ratings_df['item'] = ratings_df['item_id'].map(book_str_to_int).to_numpy()
ratings_df['user'] = ratings_df['user_id'].map(user_str_to_int).to_numpy()

# Train / Eval Split

In [6]:
train_df, eval_df = train_test_split(ratings_df, test_size=0.2, random_state=0)

In [7]:
eval_user_mask = eval_df['user'].isin(train_df['user'].unique())
eval_item_mask = eval_df['item'].isin(train_df['item'].unique())

eval_df = eval_df[eval_user_mask & eval_item_mask]

# 'user', 'item' must be the first two columns of the dataframe
train_df = train_df[['user', 'item', 'label']]
eval_df = eval_df[['user', 'item', 'label']]

In [8]:

# Step 2: Prepare the dataset for LibRecommender
train_data, data_info = DatasetPure.build_trainset(train_df)
eval_data = DatasetPure.build_evalset(eval_df)



# Bayesian Personalized Ranking and Wide Deep


In [9]:
embedding_size = 16
n_epochs_bpr = 1
n_epochs_wide = 10
learning_rate = 0.01
batch_size = 256
random_seed = 42

In [10]:

# Step 3: Initialize the BPR model
bpr = BPR(
    task="ranking",  # This specifies that we're performing a ranking task
    data_info=data_info,
    embed_size=embedding_size,  # Size of the embedding vectors
    n_epochs=n_epochs_bpr,  # Number of training epochs
    lr=learning_rate,  # Learning rate
    reg=None,  # Regularization parameter, can be tuned
    batch_size=batch_size,  # Batch size for training
    num_neg=1,  # Number of negative samples per positive sample
    use_tf=True,  # Whether to use TensorFlow backend
    seed=random_seed
)

In [11]:
# Define the WideDeep model
wide_deep = WideDeep(
    task="ranking",  # Use "rating" for explicit feedback datasets
    data_info=data_info,
    embed_size=embedding_size,
    n_epochs=n_epochs_wide,
    lr={'wide': learning_rate, 'deep': learning_rate/10},
    batch_size=batch_size,
    use_bn=True,  # Batch normalization
    hidden_units=[64, 32],  # Neural network architecture
    reg=None,  # Regularization; you can specify l2 or l1 here
    dropout_rate=None,  # Optionally add dropout to prevent overfitting
    num_neg=1,  # Number of negative samples per positive sample
    seed=random_seed
)

In [None]:

bpr.fit(
    train_data, 
    neg_sampling=True,
    verbose=2, 
    shuffle=True, 
    eval_data=eval_data, 
    metrics=["ndcg", "precision"]
)


In [12]:
wide_deep.fit(
    train_data, 
    neg_sampling=True,
    verbose=2, 
    shuffle=True, 
    eval_data=eval_data, 
    metrics=["ndcg", "precision"]
)


Training start time: [35m2024-08-10 16:19:32[0m
Instructions for updating:
Colocations handled automatically by placer.


  net = tf.layers.batch_normalization(net, training=is_training)
Instructions for updating:
Colocations handled automatically by placer.


total params: [33m3,868,095[0m | embedding params: [33m3,863,773[0m | network params: [33m4,322[0m


  net = tf.layers.batch_normalization(net, training=is_training)
2024-08-10 16:19:33.099230: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2024-08-10 16:19:33.110035: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
train: 100%|██████████| 2711/2711 [00:22<00:00, 120.49it/s]


Epoch 1 elapsed: 22.516s
	 [32mtrain_loss: 0.6345[0m


eval_listwise: 100%|██████████| 16331/16331 [11:41<00:00, 23.27it/s]


	 eval ndcg@10: 0.0095
	 eval precision@10: 0.0023


train: 100%|██████████| 2711/2711 [00:23<00:00, 117.12it/s]


Epoch 2 elapsed: 23.152s
	 [32mtrain_loss: 0.5785[0m


eval_listwise: 100%|██████████| 16331/16331 [13:50<00:00, 19.66it/s]


	 eval ndcg@10: 0.0086
	 eval precision@10: 0.0019


train: 100%|██████████| 2711/2711 [00:34<00:00, 77.84it/s]


Epoch 3 elapsed: 34.834s
	 [32mtrain_loss: 0.4602[0m


eval_listwise: 100%|██████████| 16331/16331 [17:50<00:00, 15.26it/s] 


	 eval ndcg@10: 0.0084
	 eval precision@10: 0.0018


train: 100%|██████████| 2711/2711 [00:25<00:00, 107.69it/s]


Epoch 4 elapsed: 25.178s
	 [32mtrain_loss: 0.3211[0m


eval_listwise:  64%|██████▍   | 10497/16331 [12:24<06:19, 15.36it/s] 

In [None]:
# Step 5: Evaluate the model
bpr_eval_result = evaluate(bpr, eval_data, neg_sampling=True, metrics=["ndcg", "precision", "recall"])
print(f"Evaluation Results (BPR):\n{bpr_eval_result}")

In [None]:
# Step 5: Evaluate the model
wide_deep_eval_result = evaluate(wide_deep, eval_data, neg_sampling=True, metrics=["ndcg", "precision", "recall"])
print(f"Evaluation Results (WideDeep):\n{wide_deep_eval_result}")

In [None]:
user_id = train_df.iloc[:10]['user'].values
item_id = train_df.iloc[:10]['item'].values
prediction = bpr.predict(user_id, item_id)
print(f"BPR Prediction for user {user_id} and item {item_id}: {prediction}")
prediction = wide_deep.predict(user_id, item_id)
print(f"WideDeep Prediction for user {user_id} and item {item_id}: {prediction}")