# Book Recommendation System - Interactive Testing

This notebook demonstrates all 12 recommendation query types with the hybrid MySQL+MongoDB architecture.

## Setup and Authentication

First, we'll establish database connections with proper authentication.

In [14]:
# Import required libraries
import sys
from pathlib import Path
from dotenv import load_dotenv
import os
from sqlalchemy import create_engine, text
from pymongo import MongoClient
from pymongo.server_api import ServerApi
import pandas as pd
from collections import Counter, defaultdict
import math

# Add project root to path
PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT))

print("Project root set!")

Project root set!


In [15]:
# Load environment and establish connections
ENV_PATH = PROJECT_ROOT / ".env"
load_dotenv(dotenv_path=ENV_PATH, override=True)

# Get credentials
db_name = os.getenv("DB_NAME", "bookrec")
host = os.getenv("HOST", "localhost")
msql_user = os.getenv("MSQL_USER")
msql_password = os.getenv("MSQL_PASSWORD")
msql_port = os.getenv("MSQL_PORT", "3306")
mdb_user = os.getenv("MDB_USER")
mdb_password = os.getenv("MDB_PASSWORD")
mdb_cluster = os.getenv("MDB_CLUSTER")
mdb_appname = os.getenv("MDB_APPNAME", "Cluster0")
mdb_use_atlas = os.getenv("MDB_USE_ATLAS", "false").lower() == "true"

# Validate credentials
if not all([msql_user, msql_password, mdb_user, mdb_password]):
    raise ValueError("Database credentials missing in .env file!")

print("‚úì Credentials loaded")

‚úì Credentials loaded


In [16]:
# Connect to MySQL
mysql_engine = create_engine(
    f"mysql+mysqlconnector://{msql_user}:{msql_password}@{host}:{msql_port}/{db_name}"
)

# Test connection
with mysql_engine.connect() as conn:
    result = conn.execute(text("SELECT COUNT(*) FROM users"))
    user_count = result.scalar()
    result = conn.execute(text("SELECT COUNT(*) FROM books"))
    book_count = result.scalar()
    result = conn.execute(text("SELECT COUNT(*) FROM ratings"))
    rating_count = result.scalar()

print("‚úì MySQL connected successfully!")
print(f"  Users: {user_count:,}")
print(f"  Books: {book_count:,}")
print(f"  Ratings: {rating_count:,}")

‚úì MySQL connected successfully!
  Users: 278,858
  Books: 266,457
  Ratings: 1,016,676


In [17]:
# Connect to MongoDB
if mdb_use_atlas:
    mongodb_uri = f"mongodb+srv://{mdb_user}:{mdb_password}@{mdb_cluster}/?retryWrites=true&w=majority&appName={mdb_appname}"
    mongo_client = MongoClient(mongodb_uri, server_api=ServerApi('1'))
else:
    mongodb_uri = f"mongodb://{mdb_user}:{mdb_password}@{host}:27017/"
    mongo_client = MongoClient(mongodb_uri)

mongo_db = mongo_client[db_name]

# Test connection
books_meta_count = mongo_db.books_metadata.count_documents({})
users_profiles_count = mongo_db.users_profiles.count_documents({})

print("‚úì MongoDB connected successfully!")
print(f"  Books Metadata: {books_meta_count:,}")
print(f"  User Profiles: {users_profiles_count:,}")

‚úì MongoDB connected successfully!
  Books Metadata: 266,457
  User Profiles: 278,858


## Helper Functions

Define helper functions used across multiple recommendation types.

In [18]:
def get_sample_users(min_ratings=20, limit=5):
    """Get sample user IDs with sufficient rating history"""
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT user_id, COUNT(*) as rating_count
            FROM ratings
            GROUP BY user_id
            HAVING rating_count >= :min_ratings
            ORDER BY RAND()
            LIMIT :limit
        """), {"min_ratings": min_ratings, "limit": limit})
        
        users = [(row[0], row[1]) for row in result.fetchall()]
    
    return users

def display_book_info(isbn):
    """Display detailed book information"""
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT title, authors, publisher, publication_year
            FROM books WHERE isbn = :isbn
        """), {"isbn": isbn})
        row = result.fetchone()
        
        if not row:
            return None
        
        print(f"Title: {row[0]}")
        print(f"Authors: {row[1]}")
        print(f"Publisher: {row[2]}")
        print(f"Year: {row[3]}")
        
        # Get genres
        result = conn.execute(text("""
            SELECT rg.genre_name
            FROM book_subgenres bs
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_genre_id = rg.root_genre_id
            WHERE bs.isbn = :isbn
        """), {"isbn": isbn})
        genres = [row[0] for row in result.fetchall()]
        print(f"Genres: {', '.join(genres)}")
        
        # Get metadata
        book_meta = mongo_db.books_metadata.find_one({"_id": isbn})
        if book_meta and "rating_metrics" in book_meta:
            rm = book_meta["rating_metrics"]
            print(f"Rating: {rm.get('r_avg', 'N/A')}/10 ({rm.get('r_count', 0)} ratings)")
            print(f"Quality Score: {rm.get('rating_score', 'N/A')}")

# Get sample users for testing
sample_users = get_sample_users(min_ratings=20, limit=5)
print("Sample users for testing:")
for user_id, count in sample_users:
    print(f"  User {user_id}: {count} ratings")

Sample users for testing:
  User 232106: 76 ratings
  User 269397: 44 ratings
  User 164706: 20 ratings
  User 243200: 23 ratings
  User 186312: 22 ratings


## 1. Content-Based Recommendations

Recommends books based on genre, author, and price preferences.

In [None]:
# Content-Based Recommendations
TEST_USER_ID = sample_users[0][0]  # Use first sample user

def get_content_based_recommendations(user_id, limit=10):
    """Get content-based recommendations"""
    # Get user preferences from MongoDB
    user_prof = mongo_db.users_profiles.find_one({"_id": user_id})
    prefs = user_prof.get("preferences", {}) if user_prof else {}
    
    # Get favorite genres from MySQL
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT rg.genre_name, COUNT(*) as count
            FROM ratings r
            JOIN books b ON r.isbn = b.isbn
            JOIN book_subgenres bs ON b.isbn = bs.isbn
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_genre_id = rg.root_genre_id
            WHERE r.user_id = :user_id AND r.rating >= 7
            GROUP BY rg.genre_name
            ORDER BY count DESC
            LIMIT 5
        """), {"user_id": user_id})
        
        fav_genres = [row[0] for row in result.fetchall()]
    
    if not fav_genres:
        return []
    
    # Get rated books
    with mysql_engine.connect() as conn:
        result = conn.execute(text("SELECT isbn FROM ratings WHERE user_id = :user_id"),
                             {"user_id": user_id})
        rated_isbns = {row[0] for row in result.fetchall()}
    
    # Find candidate books in favorite genres
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT DISTINCT b.isbn, b.title, b.authors
            FROM books b
            JOIN book_subgenres bs ON b.isbn = bs.isbn
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_id = rg.root_id
            WHERE rg.root_name IN :genres
            LIMIT 100
        """), {"genres": tuple(fav_genres)})
        
        candidates = [{"isbn": row[0], "title": row[1], "authors": row[2]}
                     for row in result.fetchall() if row[0] not in rated_isbns]
    
    # Score candidates
    scored = []
    for book in candidates:
        book_meta = mongo_db.books_metadata.find_one({"_id": book["isbn"]})
        if book_meta and "rating_metrics" in book_meta:
            score = book_meta["rating_metrics"].get("rating_score", 0)
            if score >= 5:
                scored.append({**book, "score": score})
    
    scored.sort(key=lambda x: x["score"], reverse=True)
    return scored[:limit]

# Test content-based
print(f"üéØ Content-Based Recommendations for User {TEST_USER_ID}\n")
content_recs = get_content_based_recommendations(TEST_USER_ID, limit=5)

for i, rec in enumerate(content_recs, 1):
    print(f"{i}. {rec['title']}")
    print(f"   Score: {rec['score']:.2f}")
    print(f"   Authors: {rec['authors']}")
    print()

üéØ Content-Based Recommendations for User 232106



ProgrammingError: (mysql.connector.errors.ProgrammingError) 1054 (42S22): Unknown column 'rg.genre_name' in 'SELECT'
[SQL: 
            SELECT rg.genre_name, COUNT(*) as count
            FROM ratings r
            JOIN books b ON r.isbn = b.isbn
            JOIN book_subgenres bs ON b.isbn = bs.isbn
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_genre_id = rg.root_genre_id
            WHERE r.user_id = %(user_id)s AND r.rating >= 7
            GROUP BY rg.genre_name
            ORDER BY count DESC
            LIMIT 5
        ]
[parameters: {'user_id': 232106}]
(Background on this error at: https://sqlalche.me/e/20/f405)

## 2. Collaborative Filtering

Finds similar users and recommends their favorite books.

In [22]:
# Collaborative Filtering
def get_collaborative_recommendations(user_id, limit=10, min_common=5):
    """Get collaborative filtering recommendations"""
    # Get user's ratings
    with mysql_engine.connect() as conn:
        result = conn.execute(text("SELECT isbn, rating FROM ratings WHERE user_id = :user_id"),
                             {"user_id": user_id})
        target_ratings = {row[0]: row[1] for row in result.fetchall()}
    
    if not target_ratings:
        return []
    
    target_books = set(target_ratings.keys())
    
    # Find users who rated similar books
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT DISTINCT user_id
            FROM ratings
            WHERE isbn IN :isbns AND user_id != :user_id
        """), {"isbns": tuple(list(target_books)[:100]), "user_id": user_id})
        
        candidate_users = [row[0] for row in result.fetchall()]
    
    # Calculate similarity (simplified Pearson correlation)
    similar_users = []
    for candidate_id in candidate_users[:100]:  # Limit for performance
        with mysql_engine.connect() as conn:
            result = conn.execute(text("SELECT isbn, rating FROM ratings WHERE user_id = :user_id"),
                                 {"user_id": candidate_id})
            candidate_ratings = {row[0]: row[1] for row in result.fetchall()}
        
        common_books = target_books.intersection(set(candidate_ratings.keys()))
        
        if len(common_books) >= min_common:
            # Simple correlation
            target_vals = [target_ratings[isbn] for isbn in common_books]
            candidate_vals = [candidate_ratings[isbn] for isbn in common_books]
            
            target_mean = sum(target_vals) / len(target_vals)
            candidate_mean = sum(candidate_vals) / len(candidate_vals)
            
            numerator = sum((t - target_mean) * (c - candidate_mean) 
                          for t, c in zip(target_vals, candidate_vals))
            
            if numerator > 0:
                similar_users.append({
                    "user_id": candidate_id,
                    "common_books": len(common_books)
                })
    
    if not similar_users:
        return []
    
    similar_users = similar_users[:10]
    
    # Get recommendations from similar users
    book_scores = defaultdict(lambda: {"score": 0, "count": 0})
    
    for similar_user in similar_users:
        with mysql_engine.connect() as conn:
            result = conn.execute(text("""
                SELECT isbn, rating FROM ratings
                WHERE user_id = :user_id AND rating >= 7
            """), {"user_id": similar_user["user_id"]})
            
            for isbn, rating in result.fetchall():
                if isbn not in target_ratings:
                    book_scores[isbn]["score"] += rating
                    book_scores[isbn]["count"] += 1
    
    # Get book details
    recommendations = []
    for isbn, data in book_scores.items():
        with mysql_engine.connect() as conn:
            result = conn.execute(text("SELECT title, authors FROM books WHERE isbn = :isbn"),
                                 {"isbn": isbn})
            row = result.fetchone()
            
            if row:
                recommendations.append({
                    "isbn": isbn,
                    "title": row[0],
                    "authors": row[1],
                    "score": data["score"] / data["count"],
                    "count": data["count"]
                })
    
    recommendations.sort(key=lambda x: (x["count"], x["score"]), reverse=True)
    return recommendations[:limit]

# Test collaborative filtering
print(f"üë• Collaborative Filtering for User {TEST_USER_ID}\n")
collab_recs = get_collaborative_recommendations(TEST_USER_ID, limit=5)

for i, rec in enumerate(collab_recs, 1):
    print(f"{i}. {rec['title']}")
    print(f"   Avg Rating from Similar Users: {rec['score']:.1f}/10")
    print(f"   Recommended by {rec['count']} similar users")
    print(f"   Authors: {rec['authors']}")
    print()

üë• Collaborative Filtering for User 232106



MySQLInterfaceError: Python type tuple cannot be converted

## 3. Trending Books

Identifies books with recent momentum and high velocity.

In [None]:
# Trending Books
def get_trending_books(limit=10, recent_window_pct=10):
    """Get trending books with recent activity"""
    # Get max r_seq_book
    with mysql_engine.connect() as conn:
        result = conn.execute(text("SELECT MAX(r_seq_book) FROM ratings"))
        max_seq = result.scalar()
    
    if not max_seq:
        return []
    
    recent_threshold = max_seq * (1 - recent_window_pct / 100)
    
    # Find books with high recent activity
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT 
                isbn,
                COUNT(*) as recent_count,
                AVG(rating) as recent_avg
            FROM ratings
            WHERE r_seq_book >= :threshold
            GROUP BY isbn
            HAVING recent_count >= 5
            ORDER BY recent_count DESC, recent_avg DESC
            LIMIT :limit
        """), {"threshold": recent_threshold, "limit": limit * 2})
        
        trending = []
        for row in result.fetchall():
            isbn, count, avg_rating = row
            
            # Get book details
            book_result = conn.execute(text("""
                SELECT title, authors FROM books WHERE isbn = :isbn
            """), {"isbn": isbn})
            book_row = book_result.fetchone()
            
            if book_row:
                trending.append({
                    "isbn": isbn,
                    "title": book_row[0],
                    "authors": book_row[1],
                    "recent_count": count,
                    "recent_avg": avg_rating,
                    "velocity_score": count * (avg_rating / 10)
                })
    
    trending.sort(key=lambda x: x["velocity_score"], reverse=True)
    return trending[:limit]

# Test trending
print("üî• Trending Books Right Now\n")
trending_books = get_trending_books(limit=5)

for i, book in enumerate(trending_books, 1):
    print(f"{i}. {book['title']}")
    print(f"   Velocity Score: {book['velocity_score']:.2f}")
    print(f"   Recent Activity: {book['recent_count']} ratings (avg: {book['recent_avg']:.1f}/10)")
    print(f"   Authors: {book['authors']}")
    print()

## 4. Similar Books

Finds books similar to a given book based on genres and authors.

In [None]:
# Get a sample book from user's highly rated books
with mysql_engine.connect() as conn:
    result = conn.execute(text("""
        SELECT b.isbn, b.title
        FROM ratings r
        JOIN books b ON r.isbn = b.isbn
        WHERE r.user_id = :user_id AND r.rating >= 8
        ORDER BY RAND()
        LIMIT 1
    """), {"user_id": TEST_USER_ID})
    row = result.fetchone()
    
    if row:
        SAMPLE_ISBN = row[0]
        SAMPLE_TITLE = row[1]
        print(f"Finding books similar to: {SAMPLE_TITLE}")
        print(f"ISBN: {SAMPLE_ISBN}\n")

In [None]:
# Similar Books
def get_similar_books(isbn, limit=10):
    """Find books similar to the given ISBN"""
    # Get target book genres
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT rg.genre_name
            FROM book_subgenres bs
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_genre_id = rg.root_genre_id
            WHERE bs.isbn = :isbn
        """), {"isbn": isbn})
        
        target_genres = [row[0] for row in result.fetchall()]
    
    if not target_genres:
        return []
    
    # Find books with overlapping genres
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT DISTINCT bs.isbn, COUNT(DISTINCT rg.genre_name) as genre_matches
            FROM book_subgenres bs
            JOIN subgenres sg ON bs.subgenre_id = sg.subgenre_id
            JOIN root_genres rg ON sg.root_genre_id = rg.root_genre_id
            WHERE rg.genre_name IN :genres AND bs.isbn != :isbn
            GROUP BY bs.isbn
            ORDER BY genre_matches DESC
            LIMIT 50
        """), {"genres": tuple(target_genres), "isbn": isbn})
        
        candidates = [(row[0], row[1]) for row in result.fetchall()]
    
    # Get details and score
    similar = []
    for candidate_isbn, genre_matches in candidates:
        with mysql_engine.connect() as conn:
            result = conn.execute(text("""
                SELECT title, authors FROM books WHERE isbn = :isbn
            """), {"isbn": candidate_isbn})
            row = result.fetchone()
            
            if row:
                # Get quality score from MongoDB
                book_meta = mongo_db.books_metadata.find_one({"_id": candidate_isbn})
                quality = 0
                if book_meta and "rating_metrics" in book_meta:
                    quality = book_meta["rating_metrics"].get("rating_score", 0)
                
                score = genre_matches * 15 + quality * 2
                
                similar.append({
                    "isbn": candidate_isbn,
                    "title": row[0],
                    "authors": row[1],
                    "genre_matches": genre_matches,
                    "similarity_score": score
                })
    
    similar.sort(key=lambda x: x["similarity_score"], reverse=True)
    return similar[:limit]

# Test similar books
print(f"üîç Books Similar to '{SAMPLE_TITLE}'\n")
similar_books = get_similar_books(SAMPLE_ISBN, limit=5)

for i, book in enumerate(similar_books, 1):
    print(f"{i}. {book['title']}")
    print(f"   Similarity Score: {book['similarity_score']:.1f}")
    print(f"   Shared Genres: {book['genre_matches']}")
    print(f"   Authors: {book['authors']}")
    print()

## 5. Recommendation Dashboard

Comprehensive overview combining multiple strategies.

In [None]:
# Recommendation Dashboard
def get_recommendation_dashboard(user_id):
    """Generate comprehensive recommendation dashboard"""
    print("=" * 80)
    print("üéØ PERSONALIZED RECOMMENDATION DASHBOARD")
    print("=" * 80)
    
    # User summary
    with mysql_engine.connect() as conn:
        result = conn.execute(text("""
            SELECT reader_level, critic_profile, mean_rating
            FROM users WHERE user_id = :user_id
        """), {"user_id": user_id})
        row = result.fetchone()
        
        if row:
            print(f"\nUser {user_id} Profile:")
            print(f"  Reader Level: {row[0]}")
            print(f"  Critic Profile: {row[1]}")
            print(f"  Average Rating: {row[2]:.2f}/10" if row[2] else "")
    
    # Content-based picks
    print("\n" + "=" * 80)
    print("üìö PERFECT FOR YOUR TASTE (Content-Based)")
    print("=" * 80)
    content_picks = get_content_based_recommendations(user_id, limit=3)
    for i, book in enumerate(content_picks, 1):
        print(f"\n{i}. {book['title']}")
        print(f"   Quality Score: {book['score']:.1f}")
    
    # Collaborative picks
    print("\n" + "=" * 80)
    print("üë• LOVED BY SIMILAR READERS (Collaborative)")
    print("=" * 80)
    collab_picks = get_collaborative_recommendations(user_id, limit=3)
    for i, book in enumerate(collab_picks, 1):
        print(f"\n{i}. {book['title']}")
        print(f"   Avg from Similar Users: {book['score']:.1f}/10")
    
    # Trending
    print("\n" + "=" * 80)
    print("üî• TRENDING NOW")
    print("=" * 80)
    trending = get_trending_books(limit=3)
    for i, book in enumerate(trending, 1):
        print(f"\n{i}. {book['title']}")
        print(f"   Velocity Score: {book['velocity_score']:.2f}")
    
    print("\n" + "=" * 80)

# Generate dashboard
get_recommendation_dashboard(TEST_USER_ID)

## Test with Multiple Users

Let's test recommendations for all sample users to see variety.

In [None]:
# Test all recommendation types for each sample user
for user_id, rating_count in sample_users[:3]:  # Test first 3 users
    print("\n" + "=" * 80)
    print(f"USER {user_id} ({rating_count} ratings)")
    print("=" * 80)
    
    # Content-based
    content = get_content_based_recommendations(user_id, limit=2)
    if content:
        print("\nüìö Content-Based:")
        for book in content:
            print(f"  ‚Ä¢ {book['title']} (score: {book['score']:.1f})")
    
    # Collaborative
    collab = get_collaborative_recommendations(user_id, limit=2)
    if collab:
        print("\nüë• Collaborative:")
        for book in collab:
            print(f"  ‚Ä¢ {book['title']} ({book['count']} similar users)")
    
    print()

## Cleanup

Close database connections.

In [None]:
# Close connections
mongo_client.close()
mysql_engine.dispose()

print("‚úì Database connections closed successfully!")