In [7]:
import pandas as pd
import json
from sklearn.metrics import ndcg_score
from collections import defaultdict

# Load your original test set
test_df = pd.read_csv('./subset_ratings.csv')  # or the same one used in both notebooks
test_df = test_df[test_df.movieId.notna()]

# Load prediction JSONs
with open("predictions/cbf_top10_subset.json") as f:
    preds_cbf = json.load(f)

with open("predictions/hybrid_top10_subset.json") as f:
    preds_hybrid = json.load(f)


In [8]:
def precision_recall_at_k(pred_dict, test_df, K=10):
    hit_count, rec_count, rel_count = 0, 0, 0

    for u, grp in test_df.groupby('userId'):
        u = str(u)  # keys in JSON are string-typed
        if u not in pred_dict:
            continue

        true_items = set(grp.movieId)
        pred_items = pred_dict[u][:K]

        hits = len(true_items.intersection(pred_items))
        hit_count += hits
        rec_count += K
        rel_count += len(true_items)

    precision = hit_count / rec_count if rec_count else 0
    recall = hit_count / rel_count if rel_count else 0
    return precision, recall


In [9]:
def ndcg_at_k(pred_dict, test_df, K=10):
    scores, truths = [], []
    for u, grp in test_df.groupby('userId'):
        u = str(u)
        if u not in pred_dict:
            continue
        true_items = set(grp.movieId)
        pred_items = pred_dict[u][:K]
        rel = [1 if m in true_items else 0 for m in pred_items]
        scores.append(rel)
        truths.append(sorted(rel, reverse=True))
    return ndcg_score(truths, scores)


In [10]:
K = 10

# Precision and Recall
prec_cbf, rec_cbf = precision_recall_at_k(preds_cbf, test_df, K)
prec_hyb, rec_hyb = precision_recall_at_k(preds_hybrid, test_df, K)

# NDCG
ndcg_cbf = ndcg_at_k(preds_cbf, test_df, K)
ndcg_hyb = ndcg_at_k(preds_hybrid, test_df, K)

# Print comparison
print(f"{'Model':<10} | Precision@{K} | Recall@{K} | NDCG@{K}")
print("-" * 45)
print(f"{'CBF':<10} | {prec_cbf:.4f}       | {rec_cbf:.4f}   | {ndcg_cbf:.4f}")
print(f"{'Hybrid':<10} | {prec_hyb:.4f}       | {rec_hyb:.4f}   | {ndcg_hyb:.4f}")


Model      | Precision@10 | Recall@10 | NDCG@10
---------------------------------------------
CBF        | 0.0305       | 0.0015   | 0.1195
Hybrid     | 0.1965       | 0.0096   | 0.4390
