In [None]:
!pip install implicit
!pip install scipy==1.14.1

In [None]:
import numpy as np
import json
import requests
import pandas as pd
import implicit
import scipy

# Función para calcular métricas

In [None]:
# funcion de una issue de implicit cuando no funcionan las metricas de implicit
# https://github.com/benfred/implicit/issues/726
def ranking_metrics_at_k(model, train_user_items, test_user_items, K=10, show_progress=True):
    """
    Calculates ranking metrics (Precision@K, MAP@K, NDCG@K, AUC) for a trained model.

    Parameters:
        model : Trained ALS model (or other Implicit model).
        train_user_items : csr_matrix
            User-item interaction matrix used for training.
        test_user_items : csr_matrix
            User-item interaction matrix for evaluation.
        K : int
            Number of items to evaluate.
        show_progress : bool
            Show a progress bar during evaluation.

    Returns:
        dict : Dictionary with precision, MAP, NDCG, and AUC scores.
    """

    # Ensure matrices are in CSR format
    train_user_items = train_user_items.tocsr()
    test_user_items = test_user_items.tocsr()

    num_users, num_items = test_user_items.shape
    relevant = 0
    total_precision_div = 0
    total_map = 0
    total_ndcg = 0
    total_auc = 0
    total_users = 0

    # Compute cumulative gain for NDCG normalization
    cg = 1.0 / np.log2(np.arange(2, K + 2))  # Discount factor
    cg_sum = np.cumsum(cg)  # Ideal DCG normalization

    # Get users with at least one item in the test set
    users_with_test_data = np.where(np.diff(test_user_items.indptr) > 0)[0]

    # Progress bar
    #progress = tqdm.tqdm(total=len(users_with_test_data), disable=not show_progress)

    batch_size = 1000
    start_idx = 0

    while start_idx < len(users_with_test_data):
        batch_users = users_with_test_data[start_idx:start_idx + batch_size]
        recommended_items, _ = model.recommend(batch_users, train_user_items[batch_users], N=K)
        start_idx += batch_size

        for user_idx, user_id in enumerate(batch_users):
            test_items = set(test_user_items.indices[test_user_items.indptr[user_id]:test_user_items.indptr[user_id + 1]])

            if not test_items:
                continue  # Skip users without test data

            num_relevant = len(test_items)
            total_precision_div += min(K, num_relevant)

            ap = 0
            hit_count = 0
            auc = 0
            idcg = cg_sum[min(K, num_relevant) - 1]  # Ideal Discounted Cumulative Gain (IDCG)
            num_negative = num_items - num_relevant

            for rank, item in enumerate(recommended_items[user_idx]):
                if item in test_items:
                    relevant += 1
                    hit_count += 1
                    ap += hit_count / (rank + 1)
                    total_ndcg += cg[rank] / idcg
                else:
                    auc += hit_count  # Accumulate hits for AUC calculation

            auc += ((hit_count + num_relevant) / 2.0) * (num_negative - (K - hit_count))
            total_map += ap / min(K, num_relevant)
            total_auc += auc / (num_relevant * num_negative)
            total_users += 1

        #progress.update(len(batch_users))

    #progress.close()

    # Compute final metrics
    precision = relevant / total_precision_div if total_precision_div > 0 else 0
    mean_ap = total_map / total_users if total_users > 0 else 0
    mean_ndcg = total_ndcg / total_users if total_users > 0 else 0
    mean_auc = total_auc / total_users if total_users > 0 else 0

    return {
        "precision": precision,
        "map": mean_ap,
        "ndcg": mean_ndcg,
        "auc": mean_auc
    }

def custom_ranking_metrics_at_k(rec_func, train_user_items, test_user_items, K=10, show_progress=False):

    # Ensure matrices are in CSR format
    train_user_items = train_user_items.tocsr()
    test_user_items = test_user_items.tocsr()

    num_users, num_items = test_user_items.shape
    relevant = 0
    total_precision_div = 0
    total_map = 0
    total_ndcg = 0
    total_auc = 0
    total_users = 0

    # Compute cumulative gain for NDCG normalization
    cg = 1.0 / np.log2(np.arange(2, K + 2))  # Discount factor
    cg_sum = np.cumsum(cg)  # Ideal DCG normalization

    # Get users with at least one item in the test set
    users_with_test_data = np.where(np.diff(test_user_items.indptr) > 0)[0]

    # Progress bar
    #progress = tqdm.tqdm(total=len(users_with_test_data), disable=not show_progress)

    batch_size = 1000
    start_idx = 0

    while start_idx < len(users_with_test_data):
        batch_users = users_with_test_data[start_idx:start_idx + batch_size]
        recommended_items = rec_func(batch_users, N=K)
        start_idx += batch_size

        for user_idx, user_id in enumerate(batch_users):
            test_items = set(test_user_items.indices[test_user_items.indptr[user_id]:test_user_items.indptr[user_id + 1]])

            if not test_items:
                continue  # Skip users without test data

            num_relevant = len(test_items)
            total_precision_div += min(K, num_relevant)

            ap = 0
            hit_count = 0
            auc = 0
            idcg = cg_sum[min(K, num_relevant) - 1]  # Ideal Discounted Cumulative Gain (IDCG)
            num_negative = num_items - num_relevant

            for rank, item in enumerate(recommended_items[user_idx]):
                if item in test_items:
                    relevant += 1
                    hit_count += 1
                    ap += hit_count / (rank + 1)
                    total_ndcg += cg[rank] / idcg
                else:
                    auc += hit_count  # Accumulate hits for AUC calculation

            auc += ((hit_count + num_relevant) / 2.0) * (num_negative - (K - hit_count))
            total_map += ap / min(K, num_relevant)
            total_auc += auc / (num_relevant * num_negative)
            total_users += 1

        #progress.update(len(batch_users))

    #progress.close()

    # Compute final metrics
    precision = relevant / total_precision_div if total_precision_div > 0 else 0
    mean_ap = total_map / total_users if total_users > 0 else 0
    mean_ndcg = total_ndcg / total_users if total_users > 0 else 0
    mean_auc = total_auc / total_users if total_users > 0 else 0

    return {
        "precision": precision,
        "map": mean_ap,
        "ndcg": mean_ndcg,
        "auc": mean_auc
    }

# Datos

## Descarga

In [None]:
!wget https://www.dropbox.com/s/57tel5zqopkssrh/books.csv?dl=0 -O books.csv
!wget https://www.dropbox.com/s/dqeqpsr0vdvmcy0/goodreads_past_interactions.json?dl=0 -O goodreads_past_interactions.json
!wget https://www.dropbox.com/s/rjtzhmb2zbpp30q/goodreads_test_interactions.json?dl=0 -O goodreads_test_interactions.json

## Cargar datos como diccionarios

In [None]:
# diccionario con id del usuario y id de libros con los que ha interactuado en el pasado
with open('goodreads_past_interactions.json') as f:
    user_interactions = json.load(f)

# diccionario con id del usuario y id de libros para testear el modelo
with open('goodreads_test_interactions.json') as f:
    user_interactions_test = json.load(f)

df_books = pd.read_csv('books.csv', sep=',')

# dict index 2 book id and vice-versa for recommendation
idx2bookid = {i: id_ for i, id_ in enumerate(df_books.book_id)}
bookid2idx = {id_:i for i, id_ in enumerate(df_books.book_id)}

idx2userid = {i: id_ for i, id_ in enumerate(user_interactions.keys())}
userid2idx = {id_:i for i, id_ in enumerate(user_interactions.keys())}

## Convertir a matrices sparse para uso con Implicit

In [None]:
from scipy.sparse import coo_matrix
# convertimos diccionarios en matrices sparse (COO)
rows = []
cols = []
data = []

for user_id, book_ids in user_interactions.items():
    for book_id in book_ids:
        u_id = userid2idx[user_id]
        b_id = bookid2idx[book_id]
        rows.append(int(u_id))
        cols.append(int(b_id))
        data.append(1)

max_user_id = max(rows)
max_item_id = max(cols)

# Crear matriz sparse
user_item_matrix_train = coo_matrix((data, (rows, cols)), shape=(max_user_id + 1, max_item_id + 1))

rows_test = []
cols_test = []
data_test = []

for user_id, book_ids in user_interactions_test.items():
    for book_id in book_ids:
        u_id = userid2idx[user_id]
        b_id = bookid2idx[book_id]
        rows_test.append(int(u_id))
        cols_test.append(int(b_id))
        data_test.append(1)

max_user_id = max([max_user_id] + rows_test)
max_item_id = max([max_item_id] + cols_test)

# Crear matriz sparse
user_item_matrix_test = coo_matrix((data_test, (rows_test, cols_test)), shape=(max_user_id + 1, max_item_id + 1))

# convertimos a formato CSR
user_item_matrix_train = user_item_matrix_train.tocsr()
user_item_matrix_test = user_item_matrix_test.tocsr()

print('Train shape: ' , user_item_matrix_train.shape)
print('Test shape:' , user_item_matrix_test.shape)

Train shape:  (52821, 4287)
Test shape: (52821, 4287)


# Modelos Baseline

## ALS

In [None]:
model_ALS = implicit.als.AlternatingLeastSquares()
model_ALS.fit(user_item_matrix_train)

  check_blas_config()


  0%|          | 0/15 [00:00<?, ?it/s]

In [None]:
# Guardar modelo
model_ALS.save('model_ALS.npz')

Tiempo de ejecución: 42s segundos

## BPR

In [None]:
model_BPR = implicit.bpr.BayesianPersonalizedRanking()
model_BPR.fit(user_item_matrix_train)

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
# Guardar modelo
model_BPR.save('model_BPR.npz')

Tiempo de ejecución: 4 minutos y 40 segundos

## IKNN

In [None]:
model_IKNN = implicit.nearest_neighbours.CosineRecommender()
model_IKNN.fit(user_item_matrix_train)



  0%|          | 0/4287 [00:00<?, ?it/s]

In [None]:
from implicit.evaluation import AUC_at_k, precision_at_k, mean_average_precision_at_k, ndcg_at_k

ks = [5, 10, 20]

metrics_ALS = {}
metrics_BPR = {}

for k in ks:
  metrics_ALS[f'AUC@{k}'] = AUC_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_ALS[f'Precision@{k}'] = precision_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_ALS[f'MAP@{k}'] = mean_average_precision_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_ALS[f'NDCG@{k}'] = ndcg_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_BPR[f'AUC@{k}'] = AUC_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_BPR[f'Precision@{k}'] = precision_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_BPR[f'MAP@{k}'] = mean_average_precision_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=k)
  metrics_BPR[f'NDCG@{k}'] = ndcg_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=k)

In [None]:
metrics_IKNN = {}

for k in ks:
  metrics_IKNN[f'AUC@{k}'] = AUC_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), user_item_matrix_test.astype(np.float32), K=k)
  metrics_IKNN[f'Precision@{k}'] = precision_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), user_item_matrix_test.astype(np.float32), K=k)
  metrics_IKNN[f'MAP@{k}'] = mean_average_precision_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), user_item_matrix_test.astype(np.float32), K=k)
  metrics_IKNN[f'NDCG@{k}'] = ndcg_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), user_item_matrix_test.astype(np.float32), K=k)

In [None]:
# Imprimir metricas
metrics_string = lambda vals: f"\t AUC: {vals[0]:.4f}, Precision: {vals[1]:.4f}, MAP: {vals[2]:.4f}, NDCG: {vals[3]:.4f}"
for k in ks:
  print(f'ALS metrics @{k}:')
  metrics = (metrics_ALS[f'AUC@{k}'], metrics_ALS[f'Precision@{k}'], metrics_ALS[f'MAP@{k}'], metrics_ALS[f'NDCG@{k}'])
  print(metrics_string(metrics))
  print(f'BPR metrics @{k}:')
  metrics = (metrics_BPR[f'AUC@{k}'], metrics_BPR[f'Precision@{k}'], metrics_BPR[f'MAP@{k}'], metrics_BPR[f'NDCG@{k}'])
  print(metrics_string(metrics))
  print(f'IKNN metrics @{k}:')
  metrics = (metrics_IKNN[f'AUC@{k}'], metrics_IKNN[f'Precision@{k}'], metrics_IKNN[f'MAP@{k}'], metrics_IKNN[f'NDCG@{k}'])
  print(metrics_string(metrics))
  print()

ALS metrics @5:
	 AUC: 0.5661, Precision: 0.2660, MAP: 0.1881, NDCG: 0.2907
BPR metrics @5:
	 AUC: 0.5255, Precision: 0.1040, MAP: 0.0592, NDCG: 0.1108
IKNN metrics @5:
	 AUC: 0.5455, Precision: 0.1840, MAP: 0.1229, NDCG: 0.1933

ALS metrics @10:
	 AUC: 0.5956, Precision: 0.1930, MAP: 0.1148, NDCG: 0.2309
BPR metrics @10:
	 AUC: 0.5434, Precision: 0.0890, MAP: 0.0382, NDCG: 0.0984
IKNN metrics @10:
	 AUC: 0.5790, Precision: 0.1600, MAP: 0.0815, NDCG: 0.1738

ALS metrics @20:
	 AUC: 0.6492, Precision: 0.3020, MAP: 0.1416, NDCG: 0.2918
BPR metrics @20:
	 AUC: 0.5699, Precision: 0.1440, MAP: 0.0474, NDCG: 0.1294
IKNN metrics @20:
	 AUC: 0.6216, Precision: 0.2470, MAP: 0.0997, NDCG: 0.2216



ALS metrics @5:
	 AUC: 0.5661, Precision: 0.2660, MAP: 0.1881, NDCG: 0.2907

BPR metrics @5:
	 AUC: 0.5255, Precision: 0.1040, MAP: 0.0592, NDCG: 0.1108

IKNN metrics @5:
	 AUC: 0.5455, Precision: 0.1840, MAP: 0.1229, NDCG: 0.1933


ALS metrics @10:
	 AUC: 0.5956, Precision: 0.1930, MAP: 0.1148, NDCG: 0.2309

BPR metrics @10:
	 AUC: 0.5434, Precision: 0.0890, MAP: 0.0382, NDCG: 0.0984

IKNN metrics @10:
	 AUC: 0.5790, Precision: 0.1600, MAP: 0.0815, NDCG: 0.1738


ALS metrics @20:
	 AUC: 0.6492, Precision: 0.3020, MAP: 0.1416, NDCG: 0.2918

BPR metrics @20:
	 AUC: 0.5699, Precision: 0.1440, MAP: 0.0474, NDCG: 0.1294

IKNN metrics @20:
	 AUC: 0.6216, Precision: 0.2470, MAP: 0.0997, NDCG: 0.2216

### Evaluar AUC normal

In [None]:
auc_ALS = AUC_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=max_item_id)
auc_BPR = AUC_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=max_item_id)
auc_IKNN = AUC_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), user_item_matrix_test.astype(np.float32), K=max_item_id)
print(f'ALS AUC: {auc_ALS:.4f}')
print(f'BPR AUC: {auc_BPR:.4f}')
print(f'IKNN AUC: {auc_IKNN:.4f}')

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

ALS AUC: 0.9198
BPR AUC: 0.9183
IKNN AUC: 0.8279


## Diversity

In [None]:
book_embeddings = np.load('goodreads_bert_embeddings.npy')

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def compute_ild_at_k(model, user_item_matrix_train, book_embeddings, K=10, user_ids=None):
    """
    Compute Intra-List Diversity at K (ILD@K) for given users.

    Parameters:
    -----------
    model : trained implicit model (ALS, BPR, etc.)
    user_item_matrix_train : scipy.sparse.csr_matrix
        Training user-item matrix.
    book_embeddings : np.ndarray
        Embeddings of books; shape = (num_books, embedding_dim)
    K : int
        Number of top recommendations to consider.
    user_ids : list of ints
        Subset of user indices to evaluate on. If None, all users are used.

    Returns:
    --------
    float
        Average ILD@K over the selected users.
    """
    num_users = user_item_matrix_train.shape[0]
    if user_ids is None:
        user_ids = range(num_users)

    ild_scores = []

    for user_id in user_ids:
        # Get top-K recommended book indices
        ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=K, filter_already_liked_items=True)
        #recommended_ids = [idx for idx, _ in recommended]
        recommended_ids = ids

        if len(recommended_ids) < 2:
            continue  # skip ILD computation for users with < 2 recommendations

        # Get the corresponding embeddings
        emb_list = book_embeddings[recommended_ids]

        # Compute cosine similarity matrix (K x K)
        sim_matrix = cosine_similarity(emb_list)

        # Exclude self-similarities (diagonal)
        upper_tri_indices = np.triu_indices_from(sim_matrix, k=1)
        pairwise_sims = sim_matrix[upper_tri_indices]

        # Compute ILD: average (1 - sim)
        ild = np.mean(1 - pairwise_sims)
        ild_scores.append(ild)

    return np.mean(ild_scores) if ild_scores else 0.0

In [None]:
test_user_ids = np.where(np.diff(user_item_matrix_test.indptr) > 0)[0]

for k in ks:
  print(f'ALS ILD@{k}: {compute_ild_at_k(model_ALS, user_item_matrix_train, book_embeddings, K=k, user_ids=test_user_ids):.4f}')
  print(f'BPR ILD@{k}: {compute_ild_at_k(model_BPR, user_item_matrix_train, book_embeddings, K=k, user_ids=test_user_ids):.4f}')
  print(f'IKNN ILD@{k}: {compute_ild_at_k(model_IKNN, user_item_matrix_train.astype(np.float32), book_embeddings, K=k, user_ids=test_user_ids):.4f}')

ALS ILD@5: 0.2774
BPR ILD@5: 0.2794
IKNN ILD@5: 0.3239
ALS ILD@10: 0.2814
BPR ILD@10: 0.2808
IKNN ILD@10: 0.3212
ALS ILD@20: 0.2847
BPR ILD@20: 0.2847
IKNN ILD@20: 0.3134


ALS ILD@5: 0.2774
BPR ILD@5: 0.2794
IKNN ILD@5: 0.3239
ALS ILD@10: 0.2814
BPR ILD@10: 0.2808
IKNN ILD@10: 0.3212
ALS ILD@20: 0.2847
BPR ILD@20: 0.2847
IKNN ILD@20: 0.3134

## Novelty

In [None]:
from collections import Counter

# Flatten user-book pairs in the training set
all_train_pairs = [(u, i) for u, i_list in user_interactions.items() for i in i_list]

# Count how many times each book was interacted with
pop_counter = Counter(i for _, i in all_train_pairs)

# Get the maximum popularity to normalize
max_pop = max(pop_counter.values())


In [None]:
import numpy as np

def novelty_at_k(topk_items, pop_counter, max_pop, k):
    """
    Computes novelty@K given a top-k list of recommended items.

    Parameters:
    -----------
    topk_items : list of book_ids (not indices)
    pop_counter : Counter of book_id -> interaction count
    max_pop : int
        Max value in pop_counter for normalization
    k : int

    Returns:
    --------
    float
        Novelty@K score
    """
    pops = np.array([pop_counter.get(i, 0) for i in topk_items[:k]], dtype=np.float32)
    return np.mean(1.0 - pops / max_pop)

def average_novelty(model, user_item_matrix_train, user_ids, pop_counter, max_pop, k=10):
    """
    Computes average novelty@K across test users.

    Parameters:
    -----------
    model : trained implicit model
    user_item_matrix_train : csr_matrix
    user_interactions_test : dict
        Mapping of external user_id to test books
    pop_counter : Counter
    max_pop : int
    k : int

    Returns:
    --------
    float
        Average novelty@K
    """
    novelty_scores = []

    for user_id in user_ids:

        # Recommend top-k items (internal indices)
        ids, scores = model.recommend(user_id, user_item_matrix_train[user_id], N=k, filter_already_liked_items=True)
        recommended_ids = ids

        novelty = novelty_at_k(recommended_ids, pop_counter, max_pop, k)
        novelty_scores.append(novelty)

    return np.mean(novelty_scores) if novelty_scores else 0.0


In [None]:
for k in ks:
  print(f'ALS Novelty@{k}: {average_novelty(model_ALS, user_item_matrix_train, test_user_ids, pop_counter, max_pop, k=k):.4f}')
  print(f'BPR Novelty@{k}: {average_novelty(model_BPR, user_item_matrix_train, test_user_ids, pop_counter, max_pop, k=k):.4f}')
  print(f'IKNN Novelty@{k}: {average_novelty(model_IKNN, user_item_matrix_train.astype(np.float32), test_user_ids, pop_counter, max_pop, k=k):.4f}')


ALS Novelty@5: 0.8243
BPR Novelty@5: 0.9245
IKNN Novelty@5: 0.5571
ALS Novelty@10: 0.8311
BPR Novelty@10: 0.9293
IKNN Novelty@10: 0.5933
ALS Novelty@20: 0.8438
BPR Novelty@20: 0.9361
IKNN Novelty@20: 0.6534


ALS Novelty@5: 0.8243
BPR Novelty@5: 0.9245
IKNN Novelty@5: 0.5571

ALS Novelty@10: 0.8311
BPR Novelty@10: 0.9293
IKNN Novelty@10: 0.5933
ALS Novelty@20: 0.8438
BPR Novelty@20: 0.9361
IKNN Novelty@20: 0.6534

In [None]:
def rec_function(user_idxs, N=10):
    recommendations, _ = model_ALS.recommend(user_idxs, user_item_matrix_train[user_idxs], N=N)
    return recommendations

In [None]:
print('Metrics ALS with custom function:', custom_ranking_metrics_at_k(rec_function, user_item_matrix_train, user_item_matrix_test, K=K))

In [None]:
K = 10
metrics_ALS = ranking_metrics_at_k(model_ALS, user_item_matrix_train, user_item_matrix_test, K=K)
metrics_BPR = ranking_metrics_at_k(model_BPR, user_item_matrix_train, user_item_matrix_test, K=K)
print("ALS", metrics_ALS)
print("BPR", metrics_BPR)

## Random

In [None]:
# recomendamos libros aleatorios para los usuarios del set de testeo
def rand_rec_fun(users, N=10):
    random_recs = []
    books_id_set = set(range(max_item_id + 1))
    for u_id in users:
        train_books_id_set = set(np.nonzero(user_item_matrix_train[u_id].toarray())[1])
        available_books_id = books_id_set - train_books_id_set
        available_books_id = list(available_books_id)
        random_recs.append(np.random.choice(available_books_id, size=K, replace=False))
    return random_recs

random_metrics = custom_ranking_metrics_at_k(rand_rec_fun, user_item_matrix_train, user_item_matrix_test, K=K)
print(random_metrics)

{'precision': 0.003, 'map': 0.0014761904761904762, 'ndcg': 0.004035015715464803, 'auc': 0.5003357493570256}


## Item-Item

In [None]:
model_ii = implicit.nearest_neighbours.CosineRecommender()
model_ii.fit(user_item_matrix_train)



  0%|          | 0/4287 [00:00<?, ?it/s]

Tiempo de ejecución: 2 segundos

In [None]:
metrics_II = ranking_metrics_at_k(model_ii, user_item_matrix_train.astype(float), user_item_matrix_test.astype(float), K=K)
print("Item-Item", metrics_II)

Item-Item {'precision': 0.16, 'map': 0.08150436507936508, 'ndcg': 0.17380040471433386, 'auc': 0.579040448912789}


# Ejemplo de recomendación

In [None]:
import random
# vamos a recomendar libros a un par de usuarios, y mostrar la informacion de los libros recomendados
example_users = random.sample(list(user_interactions.keys()), k=2)
K=10
example_recs = model_ALS.recommend([userid2idx[user] for user in example_users], user_item_matrix_train[[userid2idx[user] for user in example_users]], N=K)
print("Example recommendations for users:", example_users)
print(example_recs)

Example recommendations for users: ['49716', '26220']
(array([[236, 221, 172,  74, 252, 258, 174, 356, 270,  82],
       [ 80,  79, 107,  77,  26, 110,  49, 219, 207,  27]], dtype=int32), array([[0.4113859 , 0.40253937, 0.35937274, 0.3582532 , 0.27035043,
        0.26617002, 0.25842667, 0.24268885, 0.23811565, 0.23298746],
       [0.25490865, 0.2500957 , 0.23847449, 0.23479536, 0.22634204,
        0.22576651, 0.22193219, 0.21853296, 0.21321152, 0.20430541]],
      dtype=float32))


In [None]:
df_books.columns

Index(['book_id', 'goodreads_book_id', 'best_book_id', 'work_id',
       'books_count', 'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url', 'book_desc'],
      dtype='object')

In [None]:
# ahora mostramos las filas de df_books correspondientes a los libros recomendados
recommended_books = []
display_cols = ['book_id', 'original_title', 'authors', 'average_rating', 'ratings_count', 'original_publication_year']
dfs_rec = {}
for user_idx, recs in zip(example_users, example_recs[0]):
    #print(f"Recommendations for user {user_idx}:")
    df_user = df_books[df_books.book_id.isin([idx2bookid[rec] for rec in recs])]
    dfs_rec[user_idx] = df_user

dfs_past = {}
for user_idx in example_users:
    #print(f"Past interactions for user {user_idx}:")
    df_user = df_books[df_books.book_id.isin(user_interactions[user_idx])]
    dfs_past[user_idx] = df_user

In [None]:
dfs_past[example_users[0]][display_cols].head(10)

Unnamed: 0,book_id,original_title,authors,average_rating,ratings_count,original_publication_year
3,4,To Kill a Mockingbird,Harper Lee,4.25,3198671,1960.0
4,5,The Great Gatsby,F. Scott Fitzgerald,3.89,2683664,1925.0
47,58,The Adventures of Huckleberry Finn,"Mark Twain, John Seelye, Guy Cardwell",3.8,953758,1884.0
48,59,Charlotte's Web,"E.B. White, Garth Williams, Rosemary Wells",4.15,1064521,1952.0
53,66,Gone with the Wind,Margaret Mitchell,4.28,873981,1936.0
93,116,The Adventures of Tom Sawyer,"Mark Twain, Guy Cardwell, John Seelye",3.89,555359,1876.0
101,125,"The Tragicall Historie of Hamlet, Prince of De...","William Shakespeare, Richard Andrews, Rex Gibson",4.0,515820,1600.0
123,158,Charlie and the Chocolate Factory,"Roald Dahl, Quentin Blake",4.1,453959,1964.0
134,172,Анна Каренина,"Leo Tolstoy, Louise Maude, Leo Tolstoj, Aylmer...",4.02,297472,1877.0
135,173,A Clockwork Orange,Anthony Burgess,3.98,431195,1962.0


In [None]:
dfs_rec[example_users[0]][display_cols]

Unnamed: 0,book_id,original_title,authors,average_rating,ratings_count,original_publication_year
74,93,The Secret Garden,Frances Hodgson Burnett,4.12,639357,1911.0
82,102,Where the Wild Things Are,Maurice Sendak,4.22,620618,1963.0
172,225,East of Eden,John Steinbeck,4.35,320919,1952.0
174,229,Alice's Adventures in Wonderland,"Lewis Carroll, John Tenniel, Martin Gardner",4.06,340920,1865.0
221,293,Treasure Island,Robert Louis Stevenson,3.82,268012,1882.0
236,311,The Thorn Birds,Colleen McCullough,4.22,255651,1977.0
252,335,James and the Giant Peach,"Roald Dahl, Quentin Blake",3.99,270402,1961.0
258,346,The Wonderful Wizard of Oz,"L. Frank Baum, W.W. Denslow",3.98,247521,1900.0
270,361,Oliver Twist,"Charles Dickens, George Cruikshank, Philip Horne",3.85,230181,1838.0
356,478,The Life and Strange Surprising Adventures of ...,"Daniel Defoe, Gerald McCann, Virginia Woolf",3.66,178188,1719.0


In [None]:
print("Past interactions for user (first 10):", example_users[0])
dfs_past[example_users[1]][display_cols].head(10)

Past interactions for user (first 10): 49716


Unnamed: 0,book_id,original_title,authors,average_rating,ratings_count,original_publication_year
13,16,Män som hatar kvinnor,"Stieg Larsson, Reg Keeland",4.11,1808403,2005.0
18,22,The Lovely Bones,Alice Sebold,3.77,1605173,2002.0
28,34,Fifty Shades of Grey,E.L. James,3.67,1338493,2011.0
46,57,The Secret Life of Bees,Sue Monk Kidd,4.01,916189,2001.0
100,124,Room,Emma Donoghue,4.03,511360,2010.0
109,139,Miss Peregrine’s Home for Peculiar Children,Ransom Riggs,3.89,613674,2011.0
145,185,The Night Circus,Erin Morgenstern,4.03,429543,2011.0
148,190,Wild: From Lost to Found on the Pacific Crest ...,Cheryl Strayed,3.96,379872,2012.0
152,195,The Guernsey Literary and Potato Peel Pie Society,"Mary Ann Shaffer, Annie Barrows",4.12,393626,2008.0
245,326,White Oleander,Janet Fitch,3.93,245674,1999.0


In [None]:
print("Recommended books for user:", example_users[1])
dfs_rec[example_users[1]][display_cols]

Recommended books for user: 26220


Unnamed: 0,book_id,original_title,authors,average_rating,ratings_count,original_publication_year
26,30,Gone Girl,Gillian Flynn,4.03,512475,2012.0
27,31,The Help,Kathryn Stockett,4.45,1531753,2009.0
49,60,The Curious Incident of the Dog in the Night-Time,Mark Haddon,3.85,867553,2003.0
77,96,Fifty Shades Freed,E.L. James,3.88,387290,2012.0
79,98,Flickan som lekte med elden,"Stieg Larsson, Reg Keeland",4.22,563994,2006.0
80,99,Fifty Shades Darker,E.L. James,3.87,552059,2011.0
107,136,Divine Secrets of the Ya-Ya Sisterhood,Rebecca Wells,3.79,465676,1996.0
110,140,Luftslottet som sprängdes,"Stieg Larsson, Reg Keeland",4.2,443951,2007.0
207,277,The Ocean at the End of the Lane,Neil Gaiman,3.99,293844,2013.0
219,291,Cutting for Stone,Abraham Verghese,4.28,258319,2009.0
