In [None]:
### This notebook contains snippets of code that could be used to
#  test and develop models with alternative metrics in the modeling portions of the repo



In [None]:
#  The code is not intended to be run as is,
#  but rather to be used as a reference for 
# implementing alternative metrics in the modeling portions of the repo.

In [None]:
# 
# new variables needed for ranking: #find books that user would actually like (lets say 7 rating or above)
actual_books = {}
threshold = 7  # Define threshold for relevant books

for user in test_df['User-Index'].unique():
    actual_books[user] = set(test_df[(test_df['User-Index'] == user) & (test_df['Book-Rating'] >= threshold)]['Book-Index'].values)

#get recommended books for each user
recommended_books = {}

for user in test_df['User-Index'].unique():
    recommended_books[user] = bayes_general_recommendation(user, df['Book-Index'].unique(), trace, top_k=5)


#create book popularity variable
book_popularity = df['Book-Index'].value_counts().to_dict()

In [None]:
functions that provide the evaluation metrics: # Mean Reciprocal Rank (MRR)
def mean_reciprocal_rank(recommended_books, actual_books):
    """
    Computes the Mean Reciprocal Rank (MRR).
    recommended_books: list of recommended book indices for each user.
    actual_books: list of sets containing relevant book indices for each user.
    """
    reciprocal_ranks = []
    for rec, actual in zip(recommended_books, actual_books):
        rank = next((i+1 for i, book in enumerate(rec) if book in actual), None)
        if rank:
            reciprocal_ranks.append(1 / rank)
        else:
            reciprocal_ranks.append(0)
    return np.mean(reciprocal_ranks)

# Normalized Discounted Cumulative Gain (NDCG)
def ndcg_at_k(recommended_books, actual_books, k=5):
    """
    Computes the Normalized Discounted Cumulative Gain (NDCG) at K.
    recommended_books: list of recommended book indices for each user.
    actual_books: list of sets containing relevant book indices for each user.
    """
    def dcg(recs, actual):
        return sum((1 / np.log2(i+2)) if rec in actual else 0 for i, rec in enumerate(recs[:k]))

    ndcg_scores = []
    for rec, actual in zip(recommended_books, actual_books):
        actual_relevances = [1 if book in actual else 0 for book in rec[:k]]
        ideal_dcg = dcg(sorted(actual_relevances, reverse=True), actual)
        actual_dcg = dcg(rec, actual)
        ndcg_scores.append(actual_dcg / ideal_dcg if ideal_dcg > 0 else 0)
    return np.mean(ndcg_scores)

# Coverage
def coverage(recommended_books, total_books):
    """
    Measures recommendation diversity as the percentage of books recommended.
    recommended_books: list of recommended book indices for each user.
    total_books: total number of books in the dataset.
    """
    unique_books = set(book for rec in recommended_books for book in rec)
    return len(unique_books) / total_books

# Novelty (measuring unexpectedness)
def novelty(recommended_books, book_popularity, k=5):
    """
    Computes novelty based on how rare the recommended books are.
    book_popularity: Dictionary mapping book index to its popularity score.
    """
    novelty_scores = []
    for rec in recommended_books:
        avg_popularity = np.mean([book_popularity.get(book, 0) for book in rec[:k]])
        novelty_scores.append(1 / (1 + avg_popularity))  # Lower popularity → higher novelty
    return np.mean(novelty_scores)

# Example Usage
# recommended_books = [[101, 203, 405], [312, 120, 305]]  # Example user recommendations
# actual_books = [{101, 405}, {120}]  # Example actual relevant books
# total_books = 1000  # Assume dataset has 1000 books
# book_popularity = {101: 500, 203: 100, 405: 50, 312: 300, 120: 20, 305: 80}  # Example popularity

# print("MRR:", mean_reciprocal_rank(recommended_books, actual_books))
# print("NDCG@5:", ndcg_at_k(recommended_books, actual_books, k=5))
# print("Coverage:", coverage(recommended_books, total_books))
# print("Novelty:", novelty(recommended_books, book_popularity, k=5))