In [None]:
import os
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import random
import json
from scipy.sparse import coo_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader


# Cargar Datos

## Interacciones

In [None]:
!wget https://www.dropbox.com/s/57tel5zqopkssrh/books.csv?dl=0 -O books.csv
!wget https://www.dropbox.com/s/dqeqpsr0vdvmcy0/goodreads_past_interactions.json?dl=0 -O goodreads_past_interactions.json
!wget https://www.dropbox.com/s/rjtzhmb2zbpp30q/goodreads_test_interactions.json?dl=0 -O goodreads_test_interactions.json

In [None]:
df_books = pd.read_csv('books.csv', sep=',')

idx2bookid = {i: id_ for i, id_ in enumerate(df_books.book_id)}
bookid2idx = {id_:i for i, id_ in enumerate(df_books.book_id)}

with open("goodreads_past_interactions.json", "r") as f:
    user_interactions = json.load(f)

idx2userid = {i: id_ for i, id_ in enumerate(user_interactions.keys())}
userid2idx = {id_:i for i, id_ in enumerate(user_interactions.keys())}

with open("goodreads_test_interactions.json", "r") as f:
    user_interactions_test = json.load(f)

rows = []
cols = []
data = []

for user_id, book_ids in user_interactions.items():
    for book_id in book_ids:
        u_id = userid2idx[user_id]
        b_id = bookid2idx[book_id]
        rows.append(int(u_id))
        cols.append(int(b_id))
        data.append(1)

max_user_id = max(rows)
max_item_id = max(cols)

# Crear matriz sparse
user_item_matrix_train = coo_matrix((data, (rows, cols)), shape=(max_user_id + 1, max_item_id + 1))
print(user_item_matrix_train.shape)

rows_test = []
cols_test = []
data_test = []

for user_id, book_ids in user_interactions_test.items():
    for book_id in book_ids:
        u_id = userid2idx[user_id]
        b_id = bookid2idx[book_id]
        rows_test.append(int(u_id))
        cols_test.append(int(b_id))
        data_test.append(1)

max_user_id = max([max_user_id] + rows_test)
max_item_id = max([max_item_id] + cols_test)

test_users = list(set(rows_test))

# Crear matriz sparse
user_item_matrix_test = coo_matrix((data_test, (rows_test, cols_test)), shape=(max_user_id + 1, max_item_id + 1))
print(user_item_matrix_test.shape)

(52821, 4287)
(52821, 4287)


In [None]:
user_item_matrix_train.count_nonzero() / (user_item_matrix_train.shape[0] * user_item_matrix_train.shape[1])

0.014813943074670854

## Embeddings de libros

In [None]:
#pca_book_embeddings = np.load("pca_800_book_combined_sorted.npy")
#img_book_embeddings = np.load("imgs_features_sorted.npy")
bert_book_embeddings = np.load("goodreads_bert_embeddings.npy")

#book_embeddings = np.concatenate((img_book_embeddings, bert_book_embeddings), axis=1)

book_embeddings = bert_book_embeddings

In [None]:
book_embeddings /= np.linalg.norm(book_embeddings, axis=1, keepdims=True) + 1e-8

# VBPR

In [None]:
# --- 2. VBPR Model Definition ---
class VBPR(nn.Module):
    def __init__(self, num_users, num_items, latent_dim_mf, latent_dim_visual,
                 visual_feature_dim, projection_dim, item_visual_features):
        super(VBPR, self).__init__()

        self.num_users = num_users
        self.num_items = num_items
        self.latent_dim_mf = latent_dim_mf
        self.latent_dim_visual = latent_dim_visual
        self.visual_feature_dim = visual_feature_dim
        self.projection_dim = projection_dim
        # Batch Normalization
        #self.bn_projection = nn.BatchNorm1d(projection_dim)

        # User latent factors for MF component (U in paper)
        self.user_latent_mf = nn.Embedding(num_users, latent_dim_mf)
        # Item latent factors for MF component (V in paper)
        self.item_latent_mf = nn.Embedding(num_items, latent_dim_mf)

        # User latent factors for visual component (U' in paper)
        self.user_latent_visual = nn.Embedding(num_users, latent_dim_visual)

        # Item visual features (F in paper - pre-computed, fixed)
        # Register as a buffer so it's moved to device but not considered a trainable parameter
        self.register_buffer('item_visual_features', item_visual_features)

        # Projection matrix E for visual features
        # Projects visual_feature_dim (e.g., 768 from BERT+Image) to projection_dim
        self.item_visual_projection = nn.Linear(visual_feature_dim, projection_dim, bias=False)

        # User and Item biases
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)

        # Dropout
        #self.dropout = nn.Dropout(p=0.1)

        # Initialize weights (often important for good performance)
        self._init_weights()

    def _init_weights(self):
        # Initialize embeddings with a normal distribution
        nn.init.normal_(self.user_latent_mf.weight, std=0.01)
        nn.init.normal_(self.item_latent_mf.weight, std=0.01)
        nn.init.normal_(self.user_latent_visual.weight, std=0.01)

        # Initialize projection matrix (E)
        nn.init.xavier_uniform_(self.item_visual_projection.weight)

        # Initialize biases to zeros
        nn.init.constant_(self.user_bias.weight, 0.0)
        nn.init.constant_(self.item_bias.weight, 0.0)

    def forward(self, user_indices, item_indices):
        # Convert indices to long type
        user_indices = user_indices.long()
        item_indices = item_indices.long()

        # Get user and item latent factors for MF part
        u_mf = self.user_latent_mf(user_indices) # (batch_size, latent_dim_mf)
        v_mf = self.item_latent_mf(item_indices) # (batch_size, latent_dim_mf)

        # Get user latent factors for visual part
        u_visual = self.user_latent_visual(user_indices) # (batch_size, latent_dim_visual)
        #u_visual = self.dropout(self.user_latent_visual(user_indices))

        # Get item visual features (fixed)
        f_item = self.item_visual_features[item_indices] # (batch_size, visual_feature_dim)

        # Project item visual features (E * F_item)
        # This is the "theta" in the original paper, where theta = E * f_i
        theta_item = self.item_visual_projection(f_item) # (batch_size, projection_dim)
        #theta_item = self.bn_projection(self.item_visual_projection(f_item)) # Batch Normalize

        # Calculate scores
        # MF component: U_u * V_i
        mf_score = torch.sum(u_mf * v_mf, dim=1) # (batch_size,)

        # Visual component: U'_u * (E * F_i)
        # Note: latent_dim_visual must be equal to projection_dim for dot product here
        # Original paper implies projection_dim = latent_dim_visual, or u' is projected too
        # Let's assume projection_dim == latent_dim_visual for direct dot product as per paper's eqn 2
        if self.latent_dim_visual != self.projection_dim:
            raise ValueError("latent_dim_visual must be equal to projection_dim for direct dot product.")
            # Alternatively, if they should be different, u_visual could also be projected.
            # For simplicity matching original paper's dot product:
            # self.user_visual_projection = nn.Linear(latent_dim_visual, projection_dim, bias=False)
            # u_visual = self.user_visual_projection(u_visual_raw)

        visual_score = torch.sum(u_visual * theta_item, dim=1) # (batch_size,)

        # Biases
        user_bias = self.user_bias(user_indices).squeeze() # (batch_size,)
        item_bias = self.item_bias(item_indices).squeeze() # (batch_size,)

        # Total prediction score
        # x_ui = U_u * V_i + U'_u * (E * F_i) + b_u + b_i
        scores = mf_score + visual_score + user_bias + item_bias

        return scores

In [None]:
# --- 3. BPR Loss Function ---
class BPRLoss(nn.Module):
    def __init__(self, model, lambda_mf, lambda_visual, lambda_bias):
        super(BPRLoss, self).__init__()
        self.model = model
        self.lambda_mf = lambda_mf
        self.lambda_visual = lambda_visual
        self.lambda_bias = lambda_bias
        self.softplus = nn.Softplus() # For -log_sigmoid(x) = softplus(-x)

    def forward(self, user_indices, pos_item_indices, neg_item_indices):
        # Predict scores for positive and negative items
        positive_predictions = self.model(user_indices, pos_item_indices)
        negative_predictions = self.model(user_indices, neg_item_indices)

        # BPR loss: -log(sigmoid(x_u,pos - x_u,neg))
        # Note: sum is over batch, mean is often used. We'll use mean as it's common.
        loss_bpr = torch.mean(self.softplus(negative_predictions - positive_predictions))

        # Regularization term (L2 regularization for parameters)
        # MF part
        reg_mf_user = torch.mean(self.model.user_latent_mf.weight.norm(2, dim=1).pow(2))
        reg_mf_item = torch.mean(self.model.item_latent_mf.weight.norm(2, dim=1).pow(2))

        # Visual part (user visual factors and projection matrix E)
        reg_visual_user = torch.mean(self.model.user_latent_visual.weight.norm(2, dim=1).pow(2))
        reg_visual_proj = torch.mean(self.model.item_visual_projection.weight.norm(2).pow(2)) # F-norm

        # Biases
        reg_user_bias = torch.mean(self.model.user_bias.weight.norm(2, dim=1).pow(2))
        reg_item_bias = torch.mean(self.model.item_bias.weight.norm(2, dim=1).pow(2))

        # Total regularization
        reg_loss = (self.lambda_mf * (reg_mf_user + reg_mf_item) +
                    self.lambda_visual * (reg_visual_user + reg_visual_proj) +
                    self.lambda_bias * (reg_user_bias + reg_item_bias))

        return loss_bpr + reg_loss

In [None]:
# --- 4. Data Generation for Training (Sampler) ---
def sample_bpr_triplets(user_item_matrix, num_samples_per_user=1):
    """
    Samples (user, positive_item, negative_item) triplets for BPR training.

    Args:
        user_item_matrix (scipy.sparse.coo_matrix): The training interaction matrix.
        num_samples_per_user (int): How many triplets to sample for each user.

    Returns:
        List of tuples: (user_idx, positive_item_idx, negative_item_idx)
    """

    user_item_matrix_csr = user_item_matrix.tocsr() # Convert to CSR for faster row slicing
    num_users, num_items = user_item_matrix_csr.shape

    triplets = []

    # Get set of all items for fast lookup of non-interacted items
    all_items = set(range(num_items))

    for u_idx in tqdm(range(num_users), desc="Sampling Triplet"):
        # Get positive items for this user
        positive_items = user_item_matrix_csr.indices[user_item_matrix_csr.indptr[u_idx]:user_item_matrix_csr.indptr[u_idx+1]]

        if len(positive_items) == 0:
            continue # Skip users with no interactions

        # Get negative items for this user
        interacted_items_set = set(positive_items)
        non_interacted_items = list(all_items - interacted_items_set) # All items minus interacted

        if len(non_interacted_items) == 0:
            continue # Skip users who interacted with all items (unlikely)

        for _ in range(num_samples_per_user):
            # Sample a positive item
            pos_item = random.choice(positive_items)

            # Sample a negative item
            neg_item = random.choice(non_interacted_items)

            triplets.append((u_idx, pos_item, neg_item))

    return triplets

In [None]:
# --- Modified Evaluation Metrics (Recall@K, NDCG@K, MAP, AUC) ---
from sklearn.metrics import roc_auc_score, average_precision_score # New imports for AUC and MAP

def calculate_auc_map_for_user(scores_pos, scores_neg):
    """
    Calculates AUC and Average Precision for a single user given positive and negative scores.
    """
    if len(scores_pos) == 0 or len(scores_neg) == 0:
        return 0.5, 0.0 # Return default if no positive or negative samples

    y_true = [1] * len(scores_pos) + [0] * len(scores_neg)
    y_score = scores_pos + scores_neg

    try:
        auc = roc_auc_score(y_true, y_score)
    except ValueError:
        # Handles cases where all scores are the same, leading to division by zero
        auc = 0.5

    # For average precision, we need to rank all candidates (positives and sampled negatives)
    # and then calculate precision at each recall point.
    # It's more appropriate to compute AP based on the full ranked list against test positives.
    # We will do this differently in the main evaluate_model function.
    return auc # We return only AUC here, AP will be calculated on the full ranked list


def evaluate_model(model, train_matrix, test_matrix, K=10):
    """
    Evaluates the VBPR model using Precision@K, Recall@K, NDCG@K, MAP@K, and AUC.

    Returns:
        tuple: (avg_precision@K, avg_recall@K, avg_ndcg@K, avg_map@K, avg_auc)
    """
    model.eval()  # Set model to evaluation mode

    train_matrix_csr = train_matrix.tocsr()
    test_matrix_csr = test_matrix.tocsr()
    num_users, num_items = train_matrix.shape

    all_items = set(range(num_items))

    total_precision = 0.0
    total_recall = 0.0
    total_ndcg = 0.0
    total_map = 0.0
    total_auc = 0.0
    evaluated_users_count = 0

    with torch.no_grad():
        for u_idx in tqdm(range(num_users), desc="Evaluating"):
            test_pos_items = test_matrix_csr.indices[test_matrix_csr.indptr[u_idx]:test_matrix_csr.indptr[u_idx + 1]]

            if len(test_pos_items) == 0:
                continue

            train_items = set(train_matrix_csr.indices[train_matrix_csr.indptr[u_idx]:train_matrix_csr.indptr[u_idx + 1]])
            candidate_items = list(all_items - train_items)

            if not candidate_items:
                continue

            # Predict scores for all candidate items for this user
            user_batch = torch.tensor([u_idx] * len(candidate_items), dtype=torch.long, device=device)
            item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)
            scores = model(user_batch, item_batch).cpu().numpy()

            # Rank items by predicted score
            item_score_pairs = list(zip(candidate_items, scores))
            item_score_pairs.sort(key=lambda x: x[1], reverse=True)

            top_k_items = [item for item, _ in item_score_pairs[:K]]
            hits_set = set(top_k_items) & set(test_pos_items)
            num_hits = len(hits_set)

            # --- Precision@K ---
            precision_at_k = num_hits / K
            total_precision += precision_at_k

            # --- Recall@K ---
            recall_at_k = num_hits / len(test_pos_items)
            total_recall += recall_at_k

            # --- NDCG@K ---
            dcg = sum(1.0 / np.log2(rank + 2) for rank, item in enumerate(top_k_items) if item in test_pos_items)
            idcg = sum(1.0 / np.log2(rank + 2) for rank in range(min(len(test_pos_items), K)))
            ndcg_at_k = dcg / idcg if idcg > 0 else 0.0
            total_ndcg += ndcg_at_k

            # --- MAP@K ---
            ap_sum = 0.0
            hits = 0
            for rank, item in enumerate(top_k_items):
                if item in test_pos_items:
                    hits += 1
                    ap_sum += hits / (rank + 1)
            map_at_k = ap_sum / min(len(test_pos_items), K) if hits > 0 else 0.0
            total_map += map_at_k

            # --- AUC ---
            pos_scores = [score for item, score in item_score_pairs if item in test_pos_items]
            neg_scores = [score for item, score in item_score_pairs if item not in test_pos_items]
            auc = calculate_auc_map_for_user(pos_scores, neg_scores)
            total_auc += auc

            evaluated_users_count += 1

    model.train()  # Set model back to training mode

    if evaluated_users_count == 0:
        return 0.0, 0.0, 0.0, 0.0, 0.5  # default AUC for no users

    return (
        total_precision / evaluated_users_count,
        total_recall / evaluated_users_count,
        total_ndcg / evaluated_users_count,
        total_map / evaluated_users_count,
        total_auc / evaluated_users_count
    )


In [None]:
# --- 0. Device Configuration ---
def get_device():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")

device = get_device()
print(f"Using device: {device}")

Using device: cuda


In [None]:
import torch.optim as optim
from scipy.sparse import coo_matrix
from tqdm.notebook import tqdm # For progress bars in Jupyter/Colab

In [None]:
# --- 6. Training Loop ---
# Hyperparameters
LATENT_DIM_MF = 64         # Dimension for standard MF factors
LATENT_DIM_VISUAL = 256     # Dimension for user visual preference factors (must match projection_dim)
PROJECTION_DIM = 256        # Dimension to project visual features to
# Ensure LATENT_DIM_VISUAL == PROJECTION_DIM as per the model's forward method
if LATENT_DIM_VISUAL != PROJECTION_DIM:
    raise ValueError("LATENT_DIM_VISUAL must be equal to PROJECTION_DIM for this VBPR implementation.")

LEARNING_RATE = 0.004
BATCH_SIZE = 1024*4          # Number of (u, pos, neg) triplets per batch
EPOCHS = 35
NUM_SAMPLES_PER_USER = 50   # Number of negative samples per positive interaction for training

# Regularization coefficients
LAMBDA_MF = 1e-3
LAMBDA_VISUAL = 1e-4
LAMBDA_BIAS = 1e-5
#LAMBDA_MF = 1e-2
#LAMBDA_VISUAL = 1e-6
#LAMBDA_BIAS = 1e-5

# Get dimensions from your data
num_users = user_item_matrix_train.shape[0]
num_items = user_item_matrix_train.shape[1] # Should be consistent with item_visual_features_tensor.shape[0]
# Convert to torch tensor, move to device
item_visual_features_tensor = torch.tensor(book_embeddings, dtype=torch.float32).to(device)

# Verify dimensions
print(f"Loaded book embeddings shape: {item_visual_features_tensor.shape}")
visual_feature_dim = item_visual_features_tensor.shape[1]

# Instantiate the model
model = VBPR(
    num_users=num_users,
    num_items=num_items,
    latent_dim_mf=LATENT_DIM_MF,
    latent_dim_visual=LATENT_DIM_VISUAL,
    visual_feature_dim=visual_feature_dim,
    projection_dim=PROJECTION_DIM,
    item_visual_features=item_visual_features_tensor # Pre-loaded on device
).to(device)

# Instantiate Loss and Optimizer
criterion = BPRLoss(model, LAMBDA_MF, LAMBDA_VISUAL, LAMBDA_BIAS)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='max',
                                                       factor=0.5,
                                                       patience=2, # Reduce LR if no improvement for 3 epochs
                                                       verbose=True)

print("Starting VBPR training...")

for epoch in range(EPOCHS):
    model.train() # Set model to training mode

    # Generate training triplets for this epoch
    # (This can be time-consuming for large datasets, consider pre-sampling or more efficient iterators)
    train_triplets = sample_bpr_triplets(user_item_matrix_train, NUM_SAMPLES_PER_USER)
    random.shuffle(train_triplets) # Shuffle for better training dynamics

    total_loss = 0.0
    num_batches = 0

    # Create batches
    for i in tqdm(range(0, len(train_triplets), BATCH_SIZE), desc=f"Epoch {epoch+1}/{EPOCHS} Training"):
        batch_triplets = train_triplets[i:i + BATCH_SIZE]

        users = torch.tensor([t[0] for t in batch_triplets], dtype=torch.long, device=device)
        pos_items = torch.tensor([t[1] for t in batch_triplets], dtype=torch.long, device=device)
        neg_items = torch.tensor([t[2] for t in batch_triplets], dtype=torch.long, device=device)

        optimizer.zero_grad() # Clear gradients

        loss = criterion(users, pos_items, neg_items) # Calculate BPR loss + regularization

        loss.backward() # Backpropagate
        optimizer.step() # Update weights

        total_loss += loss.item()
        num_batches += 1

    avg_loss = total_loss / num_batches if num_batches > 0 else 0.0
    print(f"Epoch {epoch+1} done. Avg Loss: {avg_loss:.4f}")

    if (epoch + 1) % 1 == 0:
      #recall, ndcg, map_score, auc_score = evaluate_model(model, user_item_matrix_train, user_item_matrix_test, K=10)
      precision, recall, ndcg, map, auc = evaluate_model(model, user_item_matrix_train, user_item_matrix_test, K=10)
      #print(f"Epoch {epoch+1} Evaluation: Recall@10={recall:.4f}, NDCG@10={ndcg:.4f}, MAP={map_score:.4f}, AUC={auc_score:.4f}")
      print(f"Epoch {epoch+1} Evaluation: Precision@10={precision:.4f}, Recall@10={recall:.4f}, NDCG@10={ndcg:.4f}, MAP@10={map:.4f}, AUC={auc:.4f}")
    # Learning rate scheduler
    # scheduler.step
    scheduler.step(ndcg) # Pass the current NDCG score

print("Training complete.")

# Final evaluation
precision, recall, ndcg, map, auc = evaluate_model(model, user_item_matrix_train, user_item_matrix_test, K=10)
#print(f"Final Evaluation: Recall@10={recall:.4f}, NDCG@10={ndcg:.4f}, MAP={map_score:.4f}, AUC={auc_score:.4f}")
print(f"Final Evaluation: Precision@10={precision:.4f}, Recall@10={recall:.4f}, NDCG@10={ndcg:.4f}, MAP@10={map:.4f}, AUC={auc:.4f}")

Loaded book embeddings shape: torch.Size([4287, 768])
Starting VBPR training...




Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 1/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 1 done. Avg Loss: 0.3429


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 1 Evaluation: Precision@10=0.0910, Recall@10=0.0910, NDCG@10=0.0955, MAP@10=0.0378, AUC=0.9304


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 2/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 2 done. Avg Loss: 0.1858


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 2 Evaluation: Precision@10=0.1140, Recall@10=0.1140, NDCG@10=0.1230, MAP@10=0.0522, AUC=0.9412


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 3/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 3 done. Avg Loss: 0.1513


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 3 Evaluation: Precision@10=0.1250, Recall@10=0.1250, NDCG@10=0.1321, MAP@10=0.0567, AUC=0.9477


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 4/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 4 done. Avg Loss: 0.1347


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 4 Evaluation: Precision@10=0.1310, Recall@10=0.1310, NDCG@10=0.1372, MAP@10=0.0587, AUC=0.9493


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 5/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 5 done. Avg Loss: 0.1237


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 5 Evaluation: Precision@10=0.1350, Recall@10=0.1350, NDCG@10=0.1437, MAP@10=0.0612, AUC=0.9534


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 6/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 6 done. Avg Loss: 0.1162


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 6 Evaluation: Precision@10=0.1380, Recall@10=0.1380, NDCG@10=0.1510, MAP@10=0.0678, AUC=0.9550


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 7/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 7 done. Avg Loss: 0.1105


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 7 Evaluation: Precision@10=0.1330, Recall@10=0.1330, NDCG@10=0.1408, MAP@10=0.0609, AUC=0.9550


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 8/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 8 done. Avg Loss: 0.1065


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 8 Evaluation: Precision@10=0.1410, Recall@10=0.1410, NDCG@10=0.1544, MAP@10=0.0685, AUC=0.9562


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 9/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 9 done. Avg Loss: 0.1040


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 9 Evaluation: Precision@10=0.1320, Recall@10=0.1320, NDCG@10=0.1491, MAP@10=0.0658, AUC=0.9573


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 10/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 10 done. Avg Loss: 0.1012


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 10 Evaluation: Precision@10=0.1380, Recall@10=0.1380, NDCG@10=0.1597, MAP@10=0.0744, AUC=0.9568


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 11/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 11 done. Avg Loss: 0.0992


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 11 Evaluation: Precision@10=0.1480, Recall@10=0.1480, NDCG@10=0.1630, MAP@10=0.0725, AUC=0.9576


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 12/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 12 done. Avg Loss: 0.0978


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 12 Evaluation: Precision@10=0.1400, Recall@10=0.1400, NDCG@10=0.1541, MAP@10=0.0679, AUC=0.9568


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 13/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 13 done. Avg Loss: 0.0964


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 13 Evaluation: Precision@10=0.1330, Recall@10=0.1330, NDCG@10=0.1437, MAP@10=0.0607, AUC=0.9558


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 14/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 14 done. Avg Loss: 0.0952


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 14 Evaluation: Precision@10=0.1340, Recall@10=0.1340, NDCG@10=0.1437, MAP@10=0.0620, AUC=0.9572


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 15/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 15 done. Avg Loss: 0.0915


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 15 Evaluation: Precision@10=0.1290, Recall@10=0.1290, NDCG@10=0.1482, MAP@10=0.0659, AUC=0.9592


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 16/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 16 done. Avg Loss: 0.0883


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 16 Evaluation: Precision@10=0.1350, Recall@10=0.1350, NDCG@10=0.1505, MAP@10=0.0686, AUC=0.9596


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 17/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 17 done. Avg Loss: 0.0871


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 17 Evaluation: Precision@10=0.1340, Recall@10=0.1340, NDCG@10=0.1478, MAP@10=0.0650, AUC=0.9596


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 18/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 18 done. Avg Loss: 0.0848


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 18 Evaluation: Precision@10=0.1340, Recall@10=0.1340, NDCG@10=0.1462, MAP@10=0.0644, AUC=0.9605


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 19/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 19 done. Avg Loss: 0.0836


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 19 Evaluation: Precision@10=0.1420, Recall@10=0.1420, NDCG@10=0.1564, MAP@10=0.0692, AUC=0.9612


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 20/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 20 done. Avg Loss: 0.0825


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 20 Evaluation: Precision@10=0.1440, Recall@10=0.1440, NDCG@10=0.1561, MAP@10=0.0696, AUC=0.9612


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 21/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 21 done. Avg Loss: 0.0812


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 21 Evaluation: Precision@10=0.1410, Recall@10=0.1410, NDCG@10=0.1560, MAP@10=0.0705, AUC=0.9616


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 22/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 22 done. Avg Loss: 0.0811


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 22 Evaluation: Precision@10=0.1450, Recall@10=0.1450, NDCG@10=0.1594, MAP@10=0.0716, AUC=0.9618


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 23/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 23 done. Avg Loss: 0.0805


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 23 Evaluation: Precision@10=0.1480, Recall@10=0.1480, NDCG@10=0.1624, MAP@10=0.0733, AUC=0.9620


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 24/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 24 done. Avg Loss: 0.0797


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 24 Evaluation: Precision@10=0.1440, Recall@10=0.1440, NDCG@10=0.1575, MAP@10=0.0711, AUC=0.9622


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 25/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 25 done. Avg Loss: 0.0797


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 25 Evaluation: Precision@10=0.1430, Recall@10=0.1430, NDCG@10=0.1606, MAP@10=0.0733, AUC=0.9623


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 26/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 26 done. Avg Loss: 0.0791


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 26 Evaluation: Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1612, MAP@10=0.0731, AUC=0.9626


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 27/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 27 done. Avg Loss: 0.0787


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 27 Evaluation: Precision@10=0.1430, Recall@10=0.1430, NDCG@10=0.1600, MAP@10=0.0726, AUC=0.9626


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 28/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 28 done. Avg Loss: 0.0784


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 28 Evaluation: Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1613, MAP@10=0.0732, AUC=0.9628


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 29/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 29 done. Avg Loss: 0.0787


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 29 Evaluation: Precision@10=0.1440, Recall@10=0.1440, NDCG@10=0.1605, MAP@10=0.0729, AUC=0.9628


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 30/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 30 done. Avg Loss: 0.0785


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 30 Evaluation: Precision@10=0.1420, Recall@10=0.1420, NDCG@10=0.1592, MAP@10=0.0723, AUC=0.9629


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 31/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 31 done. Avg Loss: 0.0785


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 31 Evaluation: Precision@10=0.1450, Recall@10=0.1450, NDCG@10=0.1604, MAP@10=0.0727, AUC=0.9629


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 32/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 32 done. Avg Loss: 0.0785


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 32 Evaluation: Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1615, MAP@10=0.0732, AUC=0.9628


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 33/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 33 done. Avg Loss: 0.0786


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 33 Evaluation: Precision@10=0.1470, Recall@10=0.1470, NDCG@10=0.1623, MAP@10=0.0743, AUC=0.9629


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 34/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 34 done. Avg Loss: 0.0783


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 34 Evaluation: Precision@10=0.1480, Recall@10=0.1480, NDCG@10=0.1622, MAP@10=0.0738, AUC=0.9629


Sampling Triplet:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 35/35 Training:   0%|          | 0/645 [00:00<?, ?it/s]

Epoch 35 done. Avg Loss: 0.0782


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Epoch 35 Evaluation: Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1609, MAP@10=0.0734, AUC=0.9628
Training complete.


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Final Evaluation: Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1609, MAP@10=0.0734, AUC=0.9628


# Evaluación

In [None]:
ks = [5, 10, 20]

for k in ks:
  #recall, ndcg, map_score, auc_score = evaluate_model(model, user_item_matrix_train, user_item_matrix_test, K=k)
  precision, recall, ndcg, map_score, auc_score = evaluate_model(model, user_item_matrix_train, user_item_matrix_test, K=k)
  #print(f"Recall@{k}={recall:.4f}, NDCG@{k}={ndcg:.4f}, MAP={map_score:.4f}, AUC={auc_score:.4f}")
  print(f"Precision@{k}={precision:.4f}, Recall@{k}={recall:.4f}, NDCG@{k}={ndcg:.4f}, MAP@{k}={map_score:.4f}, AUC={auc_score:.4f}")

Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Precision@5=0.1780, Recall@5=0.0890, NDCG@5=0.1873, MAP@5=0.1161, AUC=0.9628


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1609, MAP@10=0.0734, AUC=0.9628


Evaluating:   0%|          | 0/52821 [00:00<?, ?it/s]

Precision@20=0.1285, Recall@20=0.2570, NDCG@20=0.2223, MAP@20=0.0970, AUC=0.9628


### PCA 800
Precision@5=0.1920, Recall@5=0.0990, NDCG@5=0.2141, MAP@5=0.1275, AUC=0.9635

Precision@10=0.1550, Recall@10=0.1550, NDCG@10=0.1785, MAP@10=0.0813, AUC=0.9635

Precision@20=0.1275, Recall@20=0.2610, NDCG@20=0.2369, MAP@20=0.1023, AUC=0.9635

### VGG16
Precision@5=0.1820, Recall@5=0.0980, NDCG@5=0.2069, MAP@5=0.1217, AUC=0.9607

Precision@10=0.1650, Recall@10=0.1650, NDCG@10=0.1810, MAP@10=0.0837, AUC=0.9607

Precision@20=0.1205, Recall@20=0.2410, NDCG@20=0.2356, MAP@20=0.0977, AUC=0.9607

### BERT
Precision@5=0.1780, Recall@5=0.0890, NDCG@5=0.1873, MAP@5=0.1161, AUC=0.9628

Precision@10=0.1460, Recall@10=0.1460, NDCG@10=0.1609, MAP@10=0.0734, AUC=0.9628

Precision@20=0.1285, Recall@20=0.2570, NDCG@20=0.2223, MAP@20=0.0970, AUC=0.9628

## Diversity

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def compute_ild_at_k(model, user_item_matrix_train, book_embeddings, K=10, user_ids=None):
    """
    Compute Intra-List Diversity at K (ILD@K) for given users.

    Parameters:
    -----------
    model : trained implicit model (ALS, BPR, etc.)
    user_item_matrix_train : scipy.sparse.csr_matrix
        Training user-item matrix.
    book_embeddings : np.ndarray
        Embeddings of books; shape = (num_books, embedding_dim)
    K : int
        Number of top recommendations to consider.
    user_ids : list of ints
        Subset of user indices to evaluate on. If None, all users are used.

    Returns:
    --------
    float
        Average ILD@K over the selected users.
    """
    num_users = user_item_matrix_train.shape[0]
    if user_ids is None:
        user_ids = range(num_users)

    num_users, num_items = user_item_matrix_train.shape

    all_items = set(range(num_items))

    ild_scores = []

    with torch.no_grad():
      for user_id in user_ids:
          # Get top-K recommended book indices
          #ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=K, filter_already_liked_items=True)
          #recommended_ids = [idx for idx, _ in recommended]
          #recommended_ids = ids
          # Predict scores for all candidate items for this user
          train_interacted_items = set(user_item_matrix_train.indices[user_item_matrix_train.indptr[user_id]:user_item_matrix_train.indptr[user_id+1]])
          # Candidate items are all items not seen in training
          candidate_items = list(all_items - train_interacted_items)

          # Filter candidates to ensure only items with embeddings are considered
          # (Given your context, all items in the idx range have embeddings, so this might not change anything)

          if not candidate_items:
              continue # No candidates to rank
          user_batch = torch.tensor([user_id] * len(candidate_items), dtype=torch.long, device=device)
          item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)

          all_scores = model(user_batch, item_batch).cpu().numpy()

          # Combine candidate items with their scores
          candidate_item_scores = list(zip(candidate_items, all_scores))

          # Sort items by predicted score in descending order
          candidate_item_scores.sort(key=lambda x: x[1], reverse=True)

          # --- Calculate Recall@K and NDCG@K ---
          recommended_ids = [item for item, score in candidate_item_scores[:K]]

          if len(recommended_ids) < 2:
              continue  # skip ILD computation for users with < 2 recommendations

          # Get the corresponding embeddings
          emb_list = book_embeddings[recommended_ids]

          # Compute cosine similarity matrix (K x K)
          sim_matrix = cosine_similarity(emb_list)

          # Exclude self-similarities (diagonal)
          upper_tri_indices = np.triu_indices_from(sim_matrix, k=1)
          pairwise_sims = sim_matrix[upper_tri_indices]

          # Compute ILD: average (1 - sim)
          ild = np.mean(1 - pairwise_sims)
          ild_scores.append(ild)

    return np.mean(ild_scores) if ild_scores else 0.0

In [None]:
test_user_ids = np.where(np.diff(user_item_matrix_test.tocsr().indptr) > 0)[0]

for k in ks:
  ild = compute_ild_at_k(model, user_item_matrix_train.tocsr(), book_embeddings, K=k, user_ids=test_user_ids)
  print(f"ILD@{k}={ild:.4f}")

ILD@5=0.2916
ILD@10=0.3007
ILD@20=0.3026


### PCA 800
ILD@5=0.9515
ILD@10=0.9550
ILD@20=0.9636

### VGG16
ILD@5=0.6462
ILD@10=0.6543
ILD@20=0.6549

### BERT
ILD@5=0.2916
ILD@10=0.3007
ILD@20=0.3026

## Novelty

In [None]:
from collections import Counter

# Flatten user-book pairs in the training set
all_train_pairs = [(u, i) for u, i_list in user_interactions.items() for i in i_list]

# Count how many times each book was interacted with
pop_counter = Counter(i for _, i in all_train_pairs)

# Get the maximum popularity to normalize
max_pop = max(pop_counter.values())


In [None]:
import numpy as np

def novelty_at_k(topk_items, pop_counter, max_pop, k):
    """
    Computes novelty@K given a top-k list of recommended items.

    Parameters:
    -----------
    topk_items : list of book_ids (not indices)
    pop_counter : Counter of book_id -> interaction count
    max_pop : int
        Max value in pop_counter for normalization
    k : int

    Returns:
    --------
    float
        Novelty@K score
    """
    pops = np.array([pop_counter.get(i, 0) for i in topk_items[:k]], dtype=np.float32)
    return np.mean(1.0 - pops / max_pop)

def average_novelty(model, user_item_matrix_train, user_ids, pop_counter, max_pop, k=10):
    """
    Computes average novelty@K across test users.

    Parameters:
    -----------
    model : trained implicit model
    user_item_matrix_train : csr_matrix
    user_interactions_test : dict
        Mapping of external user_id to test books
    pop_counter : Counter
    max_pop : int
    k : int

    Returns:
    --------
    float
        Average novelty@K
    """
    novelty_scores = []
    num_users, num_items = user_item_matrix_train.shape

    all_items = set(range(num_items))

    with torch.no_grad():
      for user_id in user_ids:

          # Recommend top-k items (internal indices)
          #ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=k, filter_already_liked_items=True)
          #recommended_ids = ids
          # Predict scores for all candidate items for this user
          train_interacted_items = set(user_item_matrix_train.indices[user_item_matrix_train.indptr[user_id]:user_item_matrix_train.indptr[user_id+1]])
          # Candidate items are all items not seen in training
          candidate_items = list(all_items - train_interacted_items)

          # Filter candidates to ensure only items with embeddings are considered
          # (Given your context, all items in the idx range have embeddings, so this might not change anything)

          if not candidate_items:
              continue # No candidates to rank
          user_batch = torch.tensor([user_id] * len(candidate_items), dtype=torch.long, device=device)
          item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)

          all_scores = model(user_batch, item_batch).cpu().numpy()

          # Combine candidate items with their scores
          candidate_item_scores = list(zip(candidate_items, all_scores))

          # Sort items by predicted score in descending order
          candidate_item_scores.sort(key=lambda x: x[1], reverse=True)

          # --- Calculate Recall@K and NDCG@K ---
          recommended_ids = [item for item, score in candidate_item_scores[:k]]

          novelty = novelty_at_k(recommended_ids, pop_counter, max_pop, k)
          novelty_scores.append(novelty)

    return np.mean(novelty_scores) if novelty_scores else 0.0


In [None]:
for k in ks:
  novelty = average_novelty(model, user_item_matrix_train.tocsr(), test_user_ids, pop_counter, max_pop, k=k)
  print(f"Novelty@{k}={novelty:.4f}")

Novelty@5=0.6976
Novelty@10=0.7166
Novelty@20=0.7456


### PCA 800
Novelty@5=0.7075
Novelty@10=0.7228
Novelty@20=0.7567

### VGG16
Novelty@5=0.6875
Novelty@10=0.7109
Novelty@20=0.7513

### BERT
Novelty@5=0.6976
Novelty@10=0.7166
Novelty@20=0.7456

# Ejemplos de recomendación

In [None]:
user_id = 2222
K = 10
user_item_matrix_train = user_item_matrix_train.tocsr()
num_users, num_items = user_item_matrix_train.shape
all_items = set(range(num_items))

with torch.no_grad():
  # Get top-K recommended book indices
  #ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=K, filter_already_liked_items=True)
  #recommended_ids = [idx for idx, _ in recommended]
  #recommended_ids = ids
  # Predict scores for all candidate items for this user
  train_interacted_items = set(user_item_matrix_train.indices[user_item_matrix_train.indptr[user_id]:user_item_matrix_train.indptr[user_id+1]])
  # Candidate items are all items not seen in training
  candidate_items = list(all_items - train_interacted_items)

  # Filter candidates to ensure only items with embeddings are considered
  # (Given your context, all items in the idx range have embeddings, so this might not change anything)

  #if not candidate_items:
  #    continue # No candidates to rank
  user_batch = torch.tensor([user_id] * len(candidate_items), dtype=torch.long, device=device)
  item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)

  all_scores = model(user_batch, item_batch).cpu().numpy()

  # Combine candidate items with their scores
  candidate_item_scores = list(zip(candidate_items, all_scores))

  # Sort items by predicted score in descending order
  candidate_item_scores.sort(key=lambda x: x[1], reverse=True)

recommended_ids = [item for item, score in candidate_item_scores[:K]]

recommended_ids_original = [idx2bookid[idx] for idx in recommended_ids]

# Get book data from df_books
recommended_books = df_books[df_books['book_id'].isin(recommended_ids_original)]
recommended_books

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...,Alternate Cover Edition ISBN: 0743273567 (ISBN...
12,14,7613,7613,2207778,896,452284244,9780452000000.0,George Orwell,1945.0,Animal Farm: A Fairy Story,...,1982987,35472,66854,135147,433432,698642,648912,https://images.gr-assets.com/books/1424037542m...,https://images.gr-assets.com/books/1424037542s...,مزرعة الحيوانات هي رائعة جورج أورويل الخالدة.....
13,16,2429135,2429135,1708725,274,307269752,9780307000000.0,"Stieg Larsson, Reg Keeland",2005.0,Män som hatar kvinnor,...,1929834,62543,54835,86051,285413,667485,836050,https://images.gr-assets.com/books/1327868566m...,https://images.gr-assets.com/books/1327868566s...,"A spellbinding amalgam of murder mystery, fami..."
22,26,968,968,2982101,350,307277674,9780307000000.0,Dan Brown,2003.0,The Da Vinci Code,...,1557292,41560,71345,126493,340790,539277,479387,https://images.gr-assets.com/books/1303252999m...,https://images.gr-assets.com/books/1303252999s...,An ingenious code hidden in the works of Leona...
110,140,6892870,6892870,12883496,193,030726999X,9780307000000.0,"Stieg Larsson, Reg Keeland",2007.0,Luftslottet som sprängdes,...,526791,26766,3823,14773,78387,203297,226511,https://images.gr-assets.com/books/1327708260m...,https://images.gr-assets.com/books/1327708260s...,The stunning third and final novel in Stieg La...
127,165,13497,13497,1019062,176,055358202X,9780554000000.0,George R.R. Martin,2005.0,A Feast for Crows,...,481130,18876,3170,18574,94401,175973,189012,https://images.gr-assets.com/books/1429538615m...,https://images.gr-assets.com/books/1429538615s...,Alternate covers can be found here.With A Feas...
250,332,65605,65605,1031537,312,60764902,9780061000000.0,C.S. Lewis,1953.0,The magician's nephew,...,303570,8690,3168,14602,69051,105375,111374,https://images.gr-assets.com/books/1308814770m...,https://images.gr-assets.com/books/1308814770s...,The secret passage to the house next door lead...
301,401,10799,10799,4652599,425,99910101,9780100000000.0,Ernest Hemingway,1929.0,A Farewell to Arms,...,207523,7249,6224,16181,50604,77096,57418,https://images.gr-assets.com/books/1313714836m...,https://images.gr-assets.com/books/1313714836s...,A Farewell to Arms is the unforgettable story ...
405,542,84119,84119,3294501,280,439861365,9780440000000.0,C.S. Lewis,1954.0,The horse and his boy,...,209180,4913,2752,13751,54684,68114,69879,https://images.gr-assets.com/books/1388210968m...,https://images.gr-assets.com/books/1388210968s...,The Horse and his Boy is a stirring and dramat...
451,611,7332,7332,4733799,339,618391118,9780618000000.0,"J.R.R. Tolkien, Christopher Tolkien, Ted Nasmith",1977.0,The Silmarillion,...,160751,5743,4943,13613,36498,47548,58149,https://images.gr-assets.com/books/1336502583m...,https://images.gr-assets.com/books/1336502583s...,The story of the creation of the world and of ...


In [None]:
past_interactions = user_interactions[idx2userid[user_id]][:10]
df_books[df_books['book_id'].isin(past_interactions)]

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...,Harry Potter's life is miserable. His parents ...
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...,The unforgettable novel of a childhood in a sl...
7,8,5107,5107,3036731,360,316769177,9780317000000.0,J.D. Salinger,1951.0,The Catcher in the Rye,...,2120637,44920,109383,185520,455042,661516,709176,https://images.gr-assets.com/books/1398034300m...,https://images.gr-assets.com/books/1398034300s...,The hero-narrator of The Catcher in the Rye is...
29,35,865,865,4835472,458,61122416,9780061000000.0,"Paulo Coelho, Alan R. Clarke",1988.0,O Alquimista,...,1403995,55781,74846,123614,289143,412180,504212,https://images.gr-assets.com/books/1483412266m...,https://images.gr-assets.com/books/1483412266s...,Paulo Coelho's masterpiece tells the mystical ...
102,126,234225,234225,3634639,241,340839937,9780341000000.0,Frank Herbert,1965.0,Dune,...,525976,13239,13354,22778,74206,154771,260867,https://images.gr-assets.com/books/1434908555m...,https://images.gr-assets.com/books/1434908555s...,Set in the far future amidst a sprawling feuda...
192,255,662,662,817219,147,452011876,9780452000000.0,"Ayn Rand, Leonard Peikoff",1957.0,Atlas Shrugged,...,296645,14666,30901,27442,53762,78744,105796,https://images.gr-assets.com/books/1405868167m...,https://images.gr-assets.com/books/1405868167s...,This is the story of a man who said that he wo...
206,276,29579,29579,1783981,169,553803719,9780554000000.0,Isaac Asimov,1951.0,Foundation,...,297588,6240,7697,12471,47251,97583,132586,https://images.gr-assets.com/books/1417900846m...,https://images.gr-assets.com/books/1417900846s...,For twelve thousand years the Galactic Empire ...
216,287,2122,2122,3331807,154,451191153,9780451000000.0,"Ayn Rand, Leonard Peikoff",1943.0,The Fountainhead,...,254976,10198,18158,18720,44131,76424,97543,https://images.gr-assets.com/books/1491163636m...,https://images.gr-assets.com/books/1491163636s...,"When The Fountainhead was first published, Ayn..."
407,546,629,629,175720,181,60589469,9780061000000.0,Robert M. Pirsig,1974.0,Zen and the Art of Motorcycle Maintenance,...,153992,6535,6589,14196,36227,49874,47106,https://images.gr-assets.com/books/1410136019m...,https://images.gr-assets.com/books/1410136019s...,One of the most important & influential books ...
1195,1877,33514,33514,35832,240,205313426,9780205000000.0,"William Strunk Jr., E.B. White",1918.0,The Elements of Style,...,57655,2942,881,2233,9114,18026,27401,https://images.gr-assets.com/books/1393947922m...,https://images.gr-assets.com/books/1393947922s...,This style manual offers practical advice on i...


In [None]:
user_id = 10000
K = 10
user_item_matrix_train = user_item_matrix_train.tocsr()
num_users, num_items = user_item_matrix_train.shape
all_items = set(range(num_items))

with torch.no_grad():
  # Get top-K recommended book indices
  #ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=K, filter_already_liked_items=True)
  #recommended_ids = [idx for idx, _ in recommended]
  #recommended_ids = ids
  # Predict scores for all candidate items for this user
  train_interacted_items = set(user_item_matrix_train.indices[user_item_matrix_train.indptr[user_id]:user_item_matrix_train.indptr[user_id+1]])
  # Candidate items are all items not seen in training
  candidate_items = list(all_items - train_interacted_items)

  # Filter candidates to ensure only items with embeddings are considered
  # (Given your context, all items in the idx range have embeddings, so this might not change anything)

  #if not candidate_items:
  #    continue # No candidates to rank
  user_batch = torch.tensor([user_id] * len(candidate_items), dtype=torch.long, device=device)
  item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)

  all_scores = model(user_batch, item_batch).cpu().numpy()

  # Combine candidate items with their scores
  candidate_item_scores = list(zip(candidate_items, all_scores))

  # Sort items by predicted score in descending order
  candidate_item_scores.sort(key=lambda x: x[1], reverse=True)

recommended_ids = [item for item, score in candidate_item_scores[:K]]

recommended_ids_original = [idx2bookid[idx] for idx in recommended_ids]

# Get book data from df_books
recommended_books = df_books[df_books['book_id'].isin(recommended_ids_original)]
recommended_books

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
16,19,34,34,3204327,566,618346252,9780618000000.0,J.R.R. Tolkien,1954.0,The Fellowship of the Ring,...,1832541,15333,38031,55862,202332,493922,1042394,https://images.gr-assets.com/books/1298411339m...,https://images.gr-assets.com/books/1298411339s...,Alternate Cover Edition ISBN 0618260269 (copyr...
32,38,14050,18619684,2153746,167,965818675,9780966000000.0,Audrey Niffenegger,2003.0,The Time Traveler's Wife,...,1308667,43382,44339,85429,257805,427210,493884,https://images.gr-assets.com/books/1437728815m...,https://images.gr-assets.com/books/1437728815s...,"A funny, often poignant tale of boy meets girl..."
96,119,38447,38447,1119185,236,038549081X,9780385000000.0,Margaret Atwood,1985.0,The Handmaid's Tale,...,683564,35925,13998,32561,116975,252942,267088,https://images.gr-assets.com/books/1498057733m...,https://images.gr-assets.com/books/1498057733s...,Offred is a Handmaid in the Republic of Gilead...
339,457,10692,10692,3061272,129,751537284,9780752000000.0,Elizabeth Kostova,2005.0,The Historian,...,207529,14252,8554,18768,48197,68781,63229,https://images.gr-assets.com/books/1338298031m...,https://images.gr-assets.com/books/1338298031s...,"Late one night, exploring her father’s library..."
491,666,2744,2744,1007964,112,60515198,9780061000000.0,Neil Gaiman,2005.0,Anansi Boys,...,153465,7758,1219,5671,32195,66653,47727,https://images.gr-assets.com/books/1479778049m...,https://images.gr-assets.com/books/1479778049s...,God is dead. Meet the kids.Fat Charlie Nancy's...
833,1217,11901,11901,2926133,68,99471426,9780099000000.0,Anne Rice,1990.0,The Witching Hour,...,87921,2997,1837,4549,15324,27118,39093,https://images.gr-assets.com/books/1327289387m...,https://images.gr-assets.com/books/1327289387s...,"On the veranda of a great New Orleans house, n..."
1024,1533,31336,31336,2925924,65,345419634,9780345000000.0,Anne Rice,1992.0,The Tale of the Body Thief,...,71768,1161,1427,6552,22211,23262,18316,https://images.gr-assets.com/books/1361577271m...,https://images.gr-assets.com/books/1361577271s...,"In a gripping feat of storytelling, Anne Rice ..."
1340,2145,56373,25104465,1013794,58,1573227889,9781573000000.0,Sarah Waters,1998.0,Tipping the Velvet,...,45239,2008,1259,2297,8646,15936,17101,https://images.gr-assets.com/books/1388633460m...,https://images.gr-assets.com/books/1388633460s...,"This delicious, steamy debut novel chronicles ..."
1354,2170,16790,16790,6277491,59,380789027,9780381000000.0,Neil Gaiman,1998.0,Smoke and Mirrors: Short Fiction and Illusions,...,52735,2160,246,1631,10927,22352,17579,https://images.gr-assets.com/books/1409595076m...,https://images.gr-assets.com/books/1409595076s...,"In the deft hands of Neil Gaiman, magic is no ..."
1915,3340,43781,43781,2925968,54,345422406,9780345000000.0,Anne Rice,2000.0,Merrick,...,34850,510,738,3019,10610,11690,8793,https://images.gr-assets.com/books/1170230458m...,https://images.gr-assets.com/books/1170230458s...,"In this mesmerizing new novel, Anne Rice demo..."


In [None]:
past_interactions = user_interactions[idx2userid[user_id]][:10]
df_books[df_books['book_id'].isin(past_interactions)]

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
13,16,2429135,2429135,1708725,274,307269752,9780307000000.0,"Stieg Larsson, Reg Keeland",2005.0,Män som hatar kvinnor,...,1929834,62543,54835,86051,285413,667485,836050,https://images.gr-assets.com/books/1327868566m...,https://images.gr-assets.com/books/1327868566s...,"A spellbinding amalgam of murder mystery, fami..."
79,98,5060378,5060378,6976108,209,307269981,9780307000000.0,"Stieg Larsson, Reg Keeland",2006.0,Flickan som lekte med elden,...,649253,31423,4623,15781,91021,258236,279592,https://images.gr-assets.com/books/1351778881m...,https://images.gr-assets.com/books/1351778881s...,The ExposeMillennium publisher Mikael Blomkvis...
110,140,6892870,6892870,12883496,193,030726999X,9780307000000.0,"Stieg Larsson, Reg Keeland",2007.0,Luftslottet som sprängdes,...,526791,26766,3823,14773,78387,203297,226511,https://images.gr-assets.com/books/1327708260m...,https://images.gr-assets.com/books/1327708260s...,The stunning third and final novel in Stieg La...
129,167,4407,30165203,1970226,196,747263744,9780747000000.0,Neil Gaiman,2001.0,American Gods,...,518343,27587,9888,24944,83253,178346,221912,https://images.gr-assets.com/books/1258417001m...,https://images.gr-assets.com/books/1258417001s...,"Days before his release from prison, Shadow's ..."
242,322,14497,14497,16534,152,60557818,9780061000000.0,Neil Gaiman,1996.0,Neverwhere,...,301740,15680,3126,9628,46996,113506,128484,https://images.gr-assets.com/books/1348747943m...,https://images.gr-assets.com/books/1348747943s...,Under the streets of London there's a place mo...
283,377,16793,16793,3166179,207,61142026,9780061000000.0,Neil Gaiman,1999.0,Stardust,...,277184,13731,2959,10183,52599,109208,102235,https://images.gr-assets.com/books/1459127484m...,https://images.gr-assets.com/books/1459127484s...,Life moves at a leisurely pace in the tiny tow...
2071,3684,52635,52635,1093522,48,61002828,9780061000000.0,Clive Barker,1986.0,The Hellbound Heart,...,28209,1132,345,1113,6134,10674,9943,https://images.gr-assets.com/books/1327312426m...,https://images.gr-assets.com/books/1327312426s...,Clive Barker is widely acknowledged as the mas...
2572,4859,32626,32626,1913322,25,425165582,9780425000000.0,Clive Barker,1984.0,Books of Blood: Volumes 1-3,...,20929,452,256,726,3560,7252,9135,https://images.gr-assets.com/books/1306747123m...,https://images.gr-assets.com/books/1306747123s...,"""Everybody is a book of blood; wherever we're ..."
2679,5134,52640,52640,942564,54,7117140,9780007000000.0,Clive Barker,1987.0,Weaveworld,...,22529,851,328,963,3829,7757,9652,https://images.gr-assets.com/books/1329374451m...,https://images.gr-assets.com/books/1329374451s...,Clive Barker has made his mark on modern ficti...
3378,7092,103035,103035,530471,45,743417321,9780743000000.0,Clive Barker,1988.0,Cabal,...,15140,374,189,698,3823,5780,4650,https://images.gr-assets.com/books/1405259936m...,https://images.gr-assets.com/books/1405259936s...,"For more than two decades, Clive Barker has tw..."


In [None]:
user_id = 12345
K = 10
user_item_matrix_train = user_item_matrix_train.tocsr()
num_users, num_items = user_item_matrix_train.shape
all_items = set(range(num_items))

with torch.no_grad():
  # Get top-K recommended book indices
  #ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=K, filter_already_liked_items=True)
  #recommended_ids = [idx for idx, _ in recommended]
  #recommended_ids = ids
  # Predict scores for all candidate items for this user
  train_interacted_items = set(user_item_matrix_train.indices[user_item_matrix_train.indptr[user_id]:user_item_matrix_train.indptr[user_id+1]])
  # Candidate items are all items not seen in training
  candidate_items = list(all_items - train_interacted_items)

  # Filter candidates to ensure only items with embeddings are considered
  # (Given your context, all items in the idx range have embeddings, so this might not change anything)

  #if not candidate_items:
  #    continue # No candidates to rank
  user_batch = torch.tensor([user_id] * len(candidate_items), dtype=torch.long, device=device)
  item_batch = torch.tensor(candidate_items, dtype=torch.long, device=device)

  all_scores = model(user_batch, item_batch).cpu().numpy()

  # Combine candidate items with their scores
  candidate_item_scores = list(zip(candidate_items, all_scores))

  # Sort items by predicted score in descending order
  candidate_item_scores.sort(key=lambda x: x[1], reverse=True)

recommended_ids = [item for item, score in candidate_item_scores[:K]]

recommended_ids_original = [idx2bookid[idx] for idx in recommended_ids]

# Get book data from df_books
recommended_books = df_books[df_books['book_id'].isin(recommended_ids_original)]
recommended_books

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
3,4,2657,2657,3275794,487,61120081,9780061000000.0,Harper Lee,1960.0,To Kill a Mockingbird,...,3340896,72586,60427,117415,446835,1001952,1714267,https://images.gr-assets.com/books/1361975680m...,https://images.gr-assets.com/books/1361975680s...,The unforgettable novel of a childhood in a sl...
8,10,1885,1885,3060926,3455,679783261,9780680000000.0,Jane Austen,1813.0,Pride and Prejudice,...,2191465,49152,54700,86485,284852,609755,1155673,https://images.gr-assets.com/books/1320399351m...,https://images.gr-assets.com/books/1320399351s...,«È cosa ormai risaputa che a uno scapolo in po...
11,13,5470,5470,153313,995,451524934,9780452000000.0,"George Orwell, Erich Fromm, Celâl Üster",1949.0,Nineteen Eighty-Four,...,2053394,45518,41845,86425,324874,692021,908229,https://images.gr-assets.com/books/1348990566m...,https://images.gr-assets.com/books/1348990566s...,"Among the seminal texts of the 20th century, N..."
93,116,24583,24583,41326609,2448,143039563,9780143000000.0,"Mark Twain, Guy Cardwell, John Seelye",1876.0,The Adventures of Tom Sawyer,...,594226,7165,12130,34349,143659,217896,186192,https://images.gr-assets.com/books/1404811979m...,https://images.gr-assets.com/books/1404811979s...,Tom Sawyer is sure to find trouble wherever th...
134,172,15823480,15823480,2507928,1492,345803922,9780346000000.0,"Leo Tolstoy, Louise Maude, Leo Tolstoj, Aylmer...",1877.0,Анна Каренина,...,472796,18064,11738,26945,88365,158179,187569,https://images.gr-assets.com/books/1352422904m...,https://images.gr-assets.com/books/1352422904s...,Acclaimed by many as the world's greatest nove...
139,177,7144,7144,3393917,1714,143058142,9780143000000.0,"Fyodor Dostoyevsky, David McDuff",1866.0,Преступление и наказание,...,444675,12605,9477,20078,64050,137104,213966,https://images.gr-assets.com/books/1382846449m...,https://images.gr-assets.com/books/1382846449s...,"Raskolnikov, an impoverished student living in..."
174,229,24213,24213,2375385,845,451527747,9780452000000.0,"Lewis Carroll, John Tenniel, Martin Gardner",1865.0,Alice's Adventures in Wonderland,...,377190,7736,5294,17482,76985,127906,149523,https://images.gr-assets.com/books/1327872220m...,https://images.gr-assets.com/books/1327872220s...,""" I can't explain myself, I'm afraid, sir,"" sa..."
384,518,34268,34268,1358908,1416,805072454,9780805000000.0,"J.M. Barrie, Michael Hague",1904.0,Peter Pan,...,196359,7464,2488,8076,38675,65507,81613,https://images.gr-assets.com/books/1337714526m...,https://images.gr-assets.com/books/1337714526s...,"Peter Pan, the book based on J.M. Barrie's fam..."
523,706,54479,54479,4537271,1903,014044906X,9780140000000.0,"Jules Verne, Michael Glencross, Brian W. Aldiss",1872.0,Le tour du monde en quatre-vingts jours,...,141132,4341,1273,6693,36897,55603,40666,https://images.gr-assets.com/books/1308815551m...,https://images.gr-assets.com/books/1308815551s...,"""To go around the world...in such a short time..."
769,1113,51893,51893,196327,970,140047484,9780140000000.0,"Friedrich Nietzsche, Walter Kaufmann",1883.0,Also sprach Zarathustra,...,79615,2185,1875,4197,14658,26373,32512,https://images.gr-assets.com/books/1480901846m...,https://images.gr-assets.com/books/1480901846s...,Friedrich Nietzsche's most accessible and infl...


In [None]:
past_interactions = user_interactions[idx2userid[user_id]][:10]
df_books[df_books['book_id'].isin(past_interactions)]

Unnamed: 0,book_id,goodreads_book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,book_desc
1,2,3,3,4640799,491,439554934,9780440000000.0,"J.K. Rowling, Mary GrandPré",1997.0,Harry Potter and the Philosopher's Stone,...,4800065,75867,75504,101676,455024,1156318,3011543,https://images.gr-assets.com/books/1474154022m...,https://images.gr-assets.com/books/1474154022s...,Harry Potter's life is miserable. His parents ...
4,5,4671,4671,245494,1356,743273567,9780743000000.0,F. Scott Fitzgerald,1925.0,The Great Gatsby,...,2773745,51992,86236,197621,606158,936012,947718,https://images.gr-assets.com/books/1490528560m...,https://images.gr-assets.com/books/1490528560s...,Alternate Cover Edition ISBN: 0743273567 (ISBN...
6,7,5907,5907,1540236,969,618260307,9780618000000.0,J.R.R. Tolkien,1937.0,The Hobbit or There and Back Again,...,2196809,37653,46023,76784,288649,665635,1119718,https://images.gr-assets.com/books/1372847500m...,https://images.gr-assets.com/books/1372847500s...,In a hole in the ground there lived a hobbit. ...
12,14,7613,7613,2207778,896,452284244,9780452000000.0,George Orwell,1945.0,Animal Farm: A Fairy Story,...,1982987,35472,66854,135147,433432,698642,648912,https://images.gr-assets.com/books/1424037542m...,https://images.gr-assets.com/books/1424037542s...,مزرعة الحيوانات هي رائعة جورج أورويل الخالدة.....
15,18,5,5,2402163,376,043965548X,9780440000000.0,"J.K. Rowling, Mary GrandPré, Rufus Beck",1999.0,Harry Potter and the Prisoner of Azkaban,...,1969375,36099,6716,20413,166129,509447,1266670,https://images.gr-assets.com/books/1499277281m...,https://images.gr-assets.com/books/1499277281s...,Harry Potter's third year at Hogwarts is full ...
16,19,34,34,3204327,566,618346252,9780618000000.0,J.R.R. Tolkien,1954.0,The Fellowship of the Ring,...,1832541,15333,38031,55862,202332,493922,1042394,https://images.gr-assets.com/books/1298411339m...,https://images.gr-assets.com/books/1298411339s...,Alternate Cover Edition ISBN 0618260269 (copyr...
24,28,7624,7624,2766512,458,140283331,9780140000000.0,William Golding,1954.0,Lord of the Flies,...,1671484,26886,92779,160295,425648,564916,427846,https://images.gr-assets.com/books/1327869409m...,https://images.gr-assets.com/books/1327869409s...,"At the dawn of the next world war, a plane cra..."
47,58,2956,2956,1835605,2277,142437174,9780142000000.0,"Mark Twain, John Seelye, Guy Cardwell",1884.0,The Adventures of Huckleberry Finn,...,1001491,12448,28467,76630,255722,344356,296316,https://images.gr-assets.com/books/1405973850m...,https://images.gr-assets.com/books/1405973850s...,A nineteenth-century boy from a Mississippi Ri...
75,94,320,320,3295655,555,60531045,9780061000000.0,"Gabriel García Márquez, Gregory Rabassa",1967.0,Cien años de soledad,...,575667,21671,27340,37646,87675,155809,267197,https://images.gr-assets.com/books/1327881361m...,https://images.gr-assets.com/books/1327881361s...,"The brilliant, bestselling, landmark novel tha..."
83,103,7126,7126,391568,1310,140449264,9780140000000.0,"Alexandre Dumas, Robin Buss",1844.0,Le Comte de Monte-Cristo,...,601220,15925,10381,22482,89980,183142,295235,https://images.gr-assets.com/books/1309203605m...,https://images.gr-assets.com/books/1309203605s...,"In 1815 Edmond Dantès, a young and successful ..."
