In [8]:
# %pip install import_ipynb
import import_ipynb 
import cornac
from cornac.data import Dataset
import cornac.metrics as met
from cornac.eval_methods import BaseMethod
from data_loader import DataLoader # type: ignore

In [9]:
data_path = "data/"
data_loader = DataLoader(data_path)

In [10]:
train_dataset, test_dataset = data_loader.load_for_cornac(dataset_type='split')
print(train_dataset.shape)
train_dataset.head()

(513384, 4)


Unnamed: 0,ReviewId,RecipeId,AuthorId,Rating
0,826743,3745,345380,4
1,1247176,26217,406131,1
2,1250914,17123,355582,5
3,183560,123283,58104,4
4,1255493,110139,383795,5


In [11]:
from cornac.models import ItemKNN
from cornac.eval_methods import RatioSplit
import pandas as pd
import time

def train_item_knn(train_dataset, test_dataset, k_values=[5, 10, 20, 50], train_percentage=0.1, test_percentage=0.1, verbose=True):
    """
    Train ItemKNN models for different k values and evaluate them.
    Returns: (item_knn_models, item_knn_results)
    """
    # Use a percentage of the dataset
    train_sample = train_dataset.sample(frac=train_percentage, random_state=42)
    test_sample = test_dataset.sample(frac=test_percentage, random_state=42)

    if verbose:
        print(f"Train dataset size: {train_sample.shape}\nTest dataset size: {test_sample.shape}")

    from cornac.data import Dataset
    cornac_train_dataset = Dataset.from_uir(train_sample[['AuthorId', 'RecipeId', 'Rating']].values.tolist(), seed=42)

    import cornac.metrics as met
    metrics = [
        met.MSE(),
        met.RMSE(),
        met.MAE(),
        met.Precision(k=10),
        met.Recall(k=10),
        met.NDCG(k=10),
    ]
    from cornac.eval_methods import BaseMethod
    eval_method = BaseMethod.from_splits(train_sample[['AuthorId', 'RecipeId', 'Rating']].values, test_sample[['AuthorId', 'RecipeId', 'Rating']].values)

    item_knn_models = {}
    item_knn_results = []

    if verbose:
        print("\n" + "="*80)
        print("ITEM-BASED COLLABORATIVE FILTERING (ItemKNN) EVALUATION")
        print("="*80)

    for k in k_values:
        if verbose:
            print(f"\nTesting ItemKNN with k={k}...")
        item_knn = ItemKNN(k=k, similarity='cosine', verbose=verbose)
        start_time = time.time()
        item_knn.fit(cornac_train_dataset)
        total_time = time.time() - start_time
        # Evaluate metrics
        results = eval_method.evaluate(item_knn, metrics=metrics, user_based=False)
        model_result = {
            'model': f'ItemKNN(k={k})',
            'k': k,
            'results': results,
            'total_time': total_time,
            'item_knn': item_knn
        }
        item_knn_models[k] = item_knn
        item_knn_results.append(model_result)
        if verbose:
            print(f"  Total time: {total_time:.2f}s")
    return item_knn_models, item_knn_results, train_sample

# Diversified Recommendations: Maximal Marginal Relevance (MMR)

In this section, we implement the Maximal Marginal Relevance (MMR) algorithm to diversify recommendations. MMR balances the utility (relevance) of recommended items with their diversity, using a parameter $\alpha$ to control the trade-off. We compare the original recommendations from user-based and item-based collaborative filtering with the diversified recommendations and evaluate them using standard metrics (Precision@k, NDCG@k, Intra-list Diversity).


In [12]:
import numpy as np
import import_ipynb

def mmr_diversify(utility_scores, item_ids, similarity_matrix, k=10, alpha=0.5):
    """
    MMR diversification for recommendations.
    utility_scores: dict {item_id: utility}
    item_ids: list of candidate item ids (sorted by utility)
    similarity_matrix: 2D numpy array, item-item similarity (cosine)
    k: number of items to recommend
    alpha: trade-off between utility and diversity (0=utility only, 1=diversity only)
    """
    selected = [item_ids[0]]  # Start with the most relevant item
    candidates = set(item_ids[1:])
    while len(selected) < k and candidates:
        mmr_scores = []
        for i in candidates:
            utility = utility_scores[i]
            # Diversity: min distance to already selected items
            diversity = min(1 - similarity_matrix[i, j] for j in selected)
            mmr_score = (1 - alpha) * utility + alpha * diversity
            mmr_scores.append((i, mmr_score))
        # Select item with highest MMR score
        next_item = max(mmr_scores, key=lambda x: x[1])[0]
        selected.append(next_item)
        candidates.remove(next_item)
    return selected

## How to Use MMR Diversification

1. Generate a list of candidate recommendations and their utility scores using your user-based or item-based recommender.
2. Compute or load the item-item similarity matrix (cosine similarity is common).
3. Use the `mmr_diversify` function to select a diversified top-k list.
4. Evaluate the diversified list using metrics such as Precision@k, NDCG@k, and Intra-list Diversity.

In [None]:
# Train the ItemKNN model (or load if already trained)
item_knn_models, item_knn_results, train_sample = train_item_knn(
    train_dataset, test_dataset, k_values=[10], train_percentage=0.1, test_percentage=0.1, verbose=False)
item_knn = item_knn_models[10]  # Use k=10 for demonstration

# Get the original user ID (if needed)
user_id = item_knn.user_ids[0]

# Get top-N recommendations (internal item indices)
top_items = item_knn.recommend(user_id, k=item_knn.total_items)

# Compute utility scores using internal indices
utility_scores = {item: item_knn.score(user_id, item) for item in top_items}

# Get the item-item similarity matrix from the model (cosine similarity)
if hasattr(item_knn, 'sim'):
    similarity_matrix = item_knn.sim
elif hasattr(item_knn, '_sim'):
    similarity_matrix = item_knn._sim
else:
    raise AttributeError("ItemKNN model does not have a similarity matrix attribute ('sim' or '_sim').")

# Prepare candidate item list (not yet rated by the user)
rated_items = set(train_sample[(train_sample['AuthorId'] == user_id)]['RecipeId'])
candidate_items = [item for item in top_items if item not in rated_items]

# Sort candidate items by utility (already sorted by recommend, but for safety)
item_ids = sorted(candidate_items, key=lambda i: utility_scores[i], reverse=True)

N = 10
alpha = 0.5
mmr_topN = mmr_diversify(utility_scores, item_ids, similarity_matrix, k=N, alpha=alpha)

print("MMR Diversified Recommendations:", mmr_topN)


