# Book Recommendation Strategy Comparison

This notebook compares three recommendation pipelines built from the existing backend services:
1. LLM + title-to-ID mapping (no Item2Vec rerank)
2. Item2Vec only, seeded from a single catalog book title
3. LLM + Item2Vec (full production pipeline)

To run this notebook you must have valid OpenAI and GCS credentials configured so the backend services can talk to OpenAI and Google Cloud Storage.


In [9]:
# Locate the Backend directory and add it to sys.path so we can import services.
from pathlib import Path
import sys

root = Path.cwd()
backend_root = None

if (root / 'Backend').exists():
    backend_root = root / 'Backend'
else:
    for parent in root.parents:
        candidate = parent / 'Backend'
        if candidate.exists():
            backend_root = candidate
            break

if backend_root is None:
    raise RuntimeError('Could not locate Backend directory from current working directory.')

if str(backend_root) not in sys.path:
    sys.path.insert(0, str(backend_root))

print(f'Using backend root: {backend_root}')

Using backend root: /Users/michael/Projects/2025Fall/BookBridge/Backend


In [10]:
# Import backend services and define helpers for the three comparison pipelines.
from functools import lru_cache

from services.openai_client import generate_book_candidates
from services.item2vec_client import (
    download_item2vec_assets,
    load_title_index,
    load_embeddings,
    filter_existing_titles,
    rerank_with_item2vec,
    get_final_book_ids,
    get_book_details,
)


@lru_cache(maxsize=1)
def _title_index():
    """Load and cache the normalized title -> asin mapping used for ID lookup."""
    assets = download_item2vec_assets(include_metadata=False)
    return load_title_index(assets.title_index_path)


@lru_cache(maxsize=1)
def _embeddings_model():
    """Load and cache the item2vec embeddings model."""
    assets = download_item2vec_assets(include_metadata=False)
    return load_embeddings(assets.embeddings_path)


def recommend_llm_and_id_mapping(prompt: str, k: int = 10):
    """Test 1: LLM + title-to-ID mapping (no Item2Vec rerank)."""
    candidates = generate_book_candidates(prompt)
    index = _title_index()
    book_ids = filter_existing_titles(candidates, index)[:k]
    details = get_book_details(book_ids)
    return {
        'candidates': candidates,
        'book_ids': book_ids,
        'details': details,
    }


def recommend_item2vec_only(seed_title: str, k: int = 10, topn_per_seed: int = 50):
    """Test 2: Item2Vec only (start from a single seed title)."""
    index = _title_index()
    seed_ids = filter_existing_titles([{'title': seed_title}], index)
    if not seed_ids:
        # If the seed title does not exist in the catalog, return empty results.
        return {
            'seed_id': None,
            'book_ids': [],
            'details': [],
        }
    seed_id = seed_ids[0]
    model = _embeddings_model()
    book_ids = rerank_with_item2vec([seed_id], model, topn_per_seed=topn_per_seed, final_k=k)
    details = get_book_details(book_ids)
    return {
        'seed_id': seed_id,
        'book_ids': book_ids,
        'details': details,
    }


def recommend_llm_plus_item2vec(prompt: str, k: int = 10, topn_per_seed: int = 50):
    """Test 3: Full pipeline (LLM + Item2Vec)."""
    candidates = generate_book_candidates(prompt)
    book_ids = get_final_book_ids(candidates, final_k=k, topn_per_seed=topn_per_seed)
    details = get_book_details(book_ids)
    return {
        'candidates': candidates,
        'book_ids': book_ids,
        'details': details,
    }


In [11]:
# # Test 1: LLM + title-to-ID mapping (no Item2Vec rerank).
# prompt = input('Test 1 - Enter a book name or preference description for LLM + ID mapping: ')

# result = recommend_llm_and_id_mapping(prompt)

# print('=== Test 1: LLM + ID mapping (no Item2Vec rerank) ===\n')
# print('--- OpenAI candidate titles ---')
# for i, rec in enumerate(result['candidates'], start=1):
#     title = rec.get('title')
#     print(f'{i}. {title}')

# print('\n--- Catalog IDs after title mapping ---')
# for i, asin in enumerate(result['book_ids'], start=1):
#     print(f'{i}. {asin}')

# print('\n--- Book details ---')
# for i, info in enumerate(result['details'], start=1):
#     asin = info.get('asin')
#     title = info.get('title')
#     author = info.get('author_name')
#     print(f'{i}. {asin} - {title} ({author})')


In [12]:
# # Test 2: Item2Vec only, seeded from a single catalog book title.
# seed_title = input('Test 2 - Enter a single seed book title for pure Item2Vec: ')

# result = recommend_item2vec_only(seed_title)

# print('=== Test 2: Item2Vec only (seeded by a single book title) ===\n')
# print(f'Seed title: {seed_title!r}')
# seed_id = result['seed_id']
# print(f'Seed catalog ID: {seed_id}')

# print('\n--- Recommended book IDs ---')
# for i, asin in enumerate(result['book_ids'], start=1):
#     print(f'{i}. {asin}')

# print('\n--- Book details ---')
# for i, info in enumerate(result['details'], start=1):
#     asin = info.get('asin')
#     title = info.get('title')
#     author = info.get('author_name')
#     print(f'{i}. {asin} - {title} ({author})')


In [13]:
# # Test 3: Full pipeline (LLM + Item2Vec).
# prompt = input('Test 3 - Enter a book name or preference description for LLM + Item2Vec: ')

# result = recommend_llm_plus_item2vec(prompt)

# print('=== Test 3: LLM + Item2Vec (full pipeline) ===\n')

# print('--- OpenAI candidate titles ---')
# for i, rec in enumerate(result['candidates'], start=1):
#     title = rec.get('title')
#     print(f'{i}. {title}')

# print('\n--- Final reranked book IDs ---')
# for i, asin in enumerate(result['book_ids'], start=1):
#     print(f'{i}. {asin}')

# print('\n--- Book details ---')
# for i, info in enumerate(result['details'], start=1):
#     asin = info.get('asin')
#     title = info.get('title')
#     author = info.get('author_name')
#     print(f'{i}. {asin} - {title} ({author})')


In [14]:
# Presentation helper to compare strategies for a book title + description.
from textwrap import shorten


def _short_title(value: str, width: int = 40) -> str:
    if not value:
        return ""
    return shorten(str(value), width=width, placeholder="...")


def _pretty_print_strategy_row(rank: int, s1: str, s2: str, s3: str) -> None:
    print(f"{rank:<4} | {s1:<40} | {s2:<40} | {s3:<40}")


def run_six_strategy_demo(book_name: str, description: str, k: int = 5):
    """Run six strategy combinations for book name and description inputs.

    Book name input:
      - LLM baseline (LLM + ID mapping)
      - Item2Vec only
      - LLM + Item2Vec

    Description input:
      - LLM baseline (LLM + ID mapping)
      - Item2Vec only (may be empty if description is not a catalog title)
      - LLM + Item2Vec
    """
    print("=" * 80)
    print(f"Book name: {book_name!r}")
    print(f"Description: {description!r}")
    print("=" * 80)
    print()

    # Book name input strategies
    book_llm = recommend_llm_and_id_mapping(book_name, k=k)
    book_item2vec = recommend_item2vec_only(book_name, k=k)
    book_full = recommend_llm_plus_item2vec(book_name, k=k)

    # Description input strategies
    desc_llm = recommend_llm_and_id_mapping(description, k=k)
    desc_item2vec = recommend_item2vec_only(description, k=k)
    desc_full = recommend_llm_plus_item2vec(description, k=k)

    # Helper to extract titles safely
    def _titles(details):
        return [d.get("title") for d in details]

    # Table for book-name input
    print("Book-name input strategies (titles only)")
    header = f"{'Rank':<4} | {'Book name – LLM+ID':<40} | {'Book name – Item2Vec':<40} | {'Book name – LLM+Item2Vec':<40}"
    print(header)
    print("-" * len(header))

    b1 = _titles(book_llm["details"])
    b2 = _titles(book_item2vec["details"])
    b3 = _titles(book_full["details"])

    for i in range(k):
        t1 = _short_title(b1[i] if i < len(b1) else "")
        t2 = _short_title(b2[i] if i < len(b2) else "")
        t3 = _short_title(b3[i] if i < len(b3) else "")
        _pretty_print_strategy_row(i + 1, t1, t2, t3)

    # Table for description input
    print("\nDescription input strategies (titles only)")
    header = f"{'Rank':<4} | {'Description – LLM+ID':<40} | {'Description – Item2Vec':<40} | {'Description – LLM+Item2Vec':<40}"
    print(header)
    print("-" * len(header))

    d1 = _titles(desc_llm["details"])
    d2 = _titles(desc_item2vec["details"])
    d3 = _titles(desc_full["details"])

    for i in range(k):
        t1 = _short_title(d1[i] if i < len(d1) else "")
        t2 = _short_title(d2[i] if i < len(d2) else "")
        t3 = _short_title(d3[i] if i < len(d3) else "")
        _pretty_print_strategy_row(i + 1, t1, t2, t3)

    print("\nDetails by strategy (first k items)")
    for label, details in [
        ("Book name – LLM+ID", book_llm["details"]),
        ("Book name – Item2Vec", book_item2vec["details"]),
        ("Book name – LLM+Item2Vec", book_full["details"]),
        ("Description – LLM+ID", desc_llm["details"]),
        ("Description – Item2Vec", desc_item2vec["details"]),
        ("Description – LLM+Item2Vec", desc_full["details"]),
    ]:
        print("\n" + label)
        print("-" * len(label))
        for idx, info in enumerate(details[:k], 1):
            asin = info.get("asin")
            title = info.get("title")
            author = info.get("author_name")
            print(f"{idx:>2}. {title} ({author}) [ASIN: {asin}]")

    return {
        "book_name": book_name,
        "description": description,
        "book_name_input": {
            "llm": book_llm,
            "item2vec": book_item2vec,
            "llm_item2vec": book_full,
        },
        "description_input": {
            "llm": desc_llm,
            "item2vec": desc_item2vec,
            "llm_item2vec": desc_full,
        },
    }


# Run six-combo demo for the presentation deck.
six_combo_results = run_six_strategy_demo(
    book_name="Dune",
    description="I'm looking for some underrated science fiction novels.",
    k=10,
)


Book name: 'Dune'
Description: "I'm looking for some underrated science fiction novels."

Book-name input strategies (titles only)
Rank | Book name – LLM+ID                       | Book name – Item2Vec                     | Book name – LLM+Item2Vec                
-------------------------------------------------------------------------------------------------------------------------------------
1    | Foundation                               | Dune                                     | Foundation                              
2    | Neuromancer                              | Shadow of the Hegemon (The Shadow...     | Neuromancer                             
3    | Snow Crash                               | The End of Eternity: A Novel             | Snow Crash                              
4    | The Forever War                          | The Anubis Gates (Ace Science Fiction)   | The Forever War                         
5    | The Moon Is a Harsh Mistress             | The Arabian Nig

In [16]:
# Pretty-print six_combo_results in one wide table (screenshot friendly).
def pretty_print_six_combo_table(results: dict, k: int = 5) -> None:
    book_name_input = results.get("book_name_input", {})
    desc_input = results.get("description_input", {})

    def _titles(key: str, bucket: dict):
        return [d.get("title") for d in bucket.get(key, {}).get("details", [])]

    bn_llm = _titles("llm", book_name_input)
    bn_i2v = _titles("item2vec", book_name_input)
    bn_full = _titles("llm_item2vec", book_name_input)

    dn_llm = _titles("llm", desc_input)
    dn_i2v = _titles("item2vec", desc_input)
    dn_full = _titles("llm_item2vec", desc_input)

    header = (
        f"{'Rank':<4} | {'Book: LLM':<28} | {'Book: Item2Vec':<28} | {'Book: LLM+Item2Vec':<32} | "
        f"{'Desc: LLM':<28} | {'Desc: Item2Vec':<28} | {'Desc: LLM+Item2Vec':<32}"
    )
    print(header)
    print("-" * len(header))

    for i in range(k):
        c1 = _short_title(bn_llm[i] if i < len(bn_llm) else "", width=28)
        c2 = _short_title(bn_i2v[i] if i < len(bn_i2v) else "", width=28)
        c3 = _short_title(bn_full[i] if i < len(bn_full) else "", width=32)
        c4 = _short_title(dn_llm[i] if i < len(dn_llm) else "", width=28)
        c5 = _short_title(dn_i2v[i] if i < len(dn_i2v) else "", width=28)
        c6 = _short_title(dn_full[i] if i < len(dn_full) else "", width=32)
        print(f"{i+1:<4} | {c1:<28} | {c2:<28} | {c3:<32} | {c4:<28} | {c5:<28} | {c6:<32}")


# Render table for existing six_combo_results (adjust k if you want fewer/more rows)
pretty_print_six_combo_table(six_combo_results, k=10)


Rank | Book: LLM                    | Book: Item2Vec               | Book: LLM+Item2Vec               | Desc: LLM                    | Desc: Item2Vec               | Desc: LLM+Item2Vec              
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1    | Foundation                   | Dune                         | Foundation                       | The Stars My Destination     |                              | The Stars My Destination        
2    | Neuromancer                  | Shadow of the Hegemon...     | Neuromancer                      | A Canticle for Leibowitz     |                              | Rendezvous with Rama            
3    | Snow Crash                   | The End of Eternity: A Novel | Snow Crash                       | Dhalgren                     |                              | A Canticle for Leibowitz        
4    