# Book Recommender System - Training & Evaluation

This notebook provides:
1. Data exploration and visualization
2. Model training and parameter tuning
3. Evaluation metrics
4. Comparison of different approaches
5. Performance analysis

In [None]:
# Imports
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import load_npz
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# Our modules
from hybrid_recommender import HybridRecommender
from knn_recommender_sparse import load_sparse_matrix, create_title_mapping, SparseKnnRecommender

# Styling
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print("✓ Imports successful!")

## 1. Data Exploration

Let's explore our dataset and understand its characteristics.

In [None]:
# Load interaction matrix
interaction_matrix = load_npz('../data/book_user_matrix_sparse.npz')

print("=" * 60)
print("Interaction Matrix Statistics")
print("=" * 60)
print(f"Shape: {interaction_matrix.shape} (books x users)")
print(f"Non-zero entries: {interaction_matrix.nnz:,}")
print(f"Sparsity: {100 * (1 - interaction_matrix.nnz / (interaction_matrix.shape[0] * interaction_matrix.shape[1])):.2f}%")
print(f"Density: {100 * interaction_matrix.nnz / (interaction_matrix.shape[0] * interaction_matrix.shape[1]):.4f}%")
print(f"Average interactions per book: {interaction_matrix.nnz / interaction_matrix.shape[0]:.2f}")
print(f"Average interactions per user: {interaction_matrix.nnz / interaction_matrix.shape[1]:.2f}")

In [None]:
# Analyze distribution of interactions
interactions_per_book = np.array(interaction_matrix.sum(axis=1)).flatten()
interactions_per_user = np.array(interaction_matrix.sum(axis=0)).flatten()

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Books
axes[0].hist(interactions_per_book, bins=50, edgecolor='black', alpha=0.7)
axes[0].set_title('Distribution of Interactions per Book', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Number of Interactions')
axes[0].set_ylabel('Number of Books')
axes[0].axvline(interactions_per_book.mean(), color='red', linestyle='--', label=f'Mean: {interactions_per_book.mean():.0f}')
axes[0].legend()

# Users
axes[1].hist(interactions_per_user, bins=50, edgecolor='black', alpha=0.7, color='orange')
axes[1].set_title('Distribution of Interactions per User', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Number of Interactions')
axes[1].set_ylabel('Number of Users')
axes[1].axvline(interactions_per_user.mean(), color='red', linestyle='--', label=f'Mean: {interactions_per_user.mean():.0f}')
axes[1].legend()

plt.tight_layout()
plt.show()

print(f"Book interaction stats: min={interactions_per_book.min()}, max={interactions_per_book.max()}, median={np.median(interactions_per_book):.0f}")
print(f"User interaction stats: min={interactions_per_user.min()}, max={interactions_per_user.max()}, median={np.median(interactions_per_user):.0f}")

In [None]:
# Load content features
content_features = load_npz('../data/content_features.npz')

print("=" * 60)
print("Content Features Statistics")
print("=" * 60)
print(f"Shape: {content_features.shape} (books x features)")
print(f"Non-zero entries: {content_features.nnz:,}")
print(f"Sparsity: {100 * (1 - content_features.nnz / (content_features.shape[0] * content_features.shape[1])):.2f}%")
print(f"Average features per book: {content_features.nnz / content_features.shape[0]:.2f}")

In [None]:
# Load metadata
metadata = pl.read_parquet('../data/book_metadata.parquet')

print("=" * 60)
print("Book Metadata Statistics")
print("=" * 60)
print(f"Total books with metadata: {len(metadata):,}")
print(f"Average rating: {metadata['average_rating'].mean():.2f}")
print(f"Average shelves per book: {metadata['num_shelves'].mean():.2f}")
print(f"Average authors per book: {metadata['num_authors'].mean():.2f}")
print(f"\nYear range: {metadata['publication_year'].min()} - {metadata['publication_year'].max()}")
print(f"Page range: {metadata['num_pages'].min()} - {metadata['num_pages'].max()}")

## 2. Model Training & Parameter Tuning

Let's train models with different parameters and compare them.

In [None]:
# Initialize recommenders with different weights
import time

weight_configs = [
    (1.0, 0.0, "Collaborative Only"),
    (0.0, 1.0, "Content Only"),
    (0.7, 0.3, "Collaborative Heavy"),
    (0.6, 0.4, "Balanced 60/40"),
    (0.5, 0.5, "Equal Weights"),
    (0.4, 0.6, "Content Heavy"),
]

recommenders = {}
training_times = {}

print("Training recommenders with different weight configurations...\n")

for collab_w, content_w, name in weight_configs:
    print(f"Training: {name} (collab={collab_w}, content={content_w})")
    start = time.time()
    
    recommender = HybridRecommender(
        collaborative_weight=collab_w,
        content_weight=content_w,
        n_neighbors=30
    )
    
    training_times[name] = time.time() - start
    recommenders[name] = recommender
    
    print(f"  Trained in {training_times[name]:.2f}s\n")

print("✓ All models trained!")

In [None]:
# Visualize training times
fig = px.bar(
    x=list(training_times.keys()),
    y=list(training_times.values()),
    title='Training Time Comparison',
    labels={'x': 'Configuration', 'y': 'Time (seconds)'},
    color=list(training_times.values()),
    color_continuous_scale='Blues'
)
fig.update_layout(showlegend=False)
fig.show()

## 3. Evaluation Metrics

Let's evaluate our models using various metrics.

In [None]:
def evaluate_diversity(recommendations, metadata_dict, recommender):
    """Calculate diversity based on unique genres in recommendations."""
    unique_genres = set()
    total_genres = 0
    
    for idx, _, _, _ in recommendations:
        book_id = recommender.book_ids[idx]
        if book_id in metadata_dict:
            shelves = metadata_dict[book_id].get('shelves', '')
            if shelves:
                genres = shelves.split(',')[:5]
                unique_genres.update(genres)
                total_genres += len(genres)
    
    if total_genres == 0:
        return 0.0
    
    return len(unique_genres) / total_genres


def evaluate_coverage(recommendations, total_books):
    """Calculate what percentage of catalog appears in recommendations."""
    unique_books = len(set(idx for idx, _, _, _ in recommendations))
    return unique_books / total_books


def evaluate_novelty(recommendations, interactions_per_book):
    """Calculate average novelty (inverse popularity) of recommendations."""
    novelties = []
    max_interactions = interactions_per_book.max()
    
    for idx, _, _, _ in recommendations:
        popularity = interactions_per_book[idx]
        novelty = 1 - (popularity / max_interactions)
        novelties.append(novelty)
    
    return np.mean(novelties) if novelties else 0.0

In [None]:
# Evaluate all configurations on sample books
test_books = [
    "1984",
    "The Great Gatsby",
    "Lord of the Rings",
]

metadata_dict = {row['book_id']: row for row in metadata.iter_rows(named=True)}

results = []

for book_query in test_books:
    print(f"\nEvaluating recommendations for: {book_query}")
    print("-" * 60)
    
    for config_name, recommender in recommenders.items():
        # Find book
        matches = recommender.fuzzy_search(book_query, threshold=60)
        if not matches:
            print(f"  {config_name}: No match found")
            continue
        
        best_title, book_idx, match_score = matches[0]
        
        # Get recommendations
        recommendations = recommender.recommend_hybrid(book_idx, n_recommendations=20)
        
        # Calculate metrics
        avg_score = np.mean([score for _, score, _, _ in recommendations])
        diversity = evaluate_diversity(recommendations, metadata_dict, recommender)
        coverage = evaluate_coverage(recommendations, interaction_matrix.shape[0])
        novelty = evaluate_novelty(recommendations, interactions_per_book)
        
        results.append({
            'Book': book_query,
            'Configuration': config_name,
            'Avg Score': avg_score,
            'Diversity': diversity,
            'Coverage': coverage,
            'Novelty': novelty
        })
        
        print(f"  {config_name:20s} | Score: {avg_score:.3f} | Div: {diversity:.3f} | Nov: {novelty:.3f}")

results_df = pd.DataFrame(results)
print("\n✓ Evaluation complete!")

In [None]:
# Visualize results
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

metrics = ['Avg Score', 'Diversity', 'Coverage', 'Novelty']

for idx, metric in enumerate(metrics):
    ax = axes[idx // 2, idx % 2]
    
    pivot_data = results_df.pivot(index='Configuration', columns='Book', values=metric)
    pivot_data.plot(kind='bar', ax=ax, width=0.8)
    
    ax.set_title(f'{metric} by Configuration', fontsize=12, fontweight='bold')
    ax.set_xlabel('Configuration')
    ax.set_ylabel(metric)
    ax.legend(title='Test Book')
    ax.grid(True, alpha=0.3)
    
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
summary = results_df.groupby('Configuration')[['Avg Score', 'Diversity', 'Coverage', 'Novelty']].mean()
summary = summary.round(3)
print("\n" + "=" * 80)
print("Average Metrics Across All Test Books")
print("=" * 80)
print(summary.to_string())
print("\n")

# Find best configuration for each metric
print("Best Configurations:")
for metric in ['Avg Score', 'Diversity', 'Coverage', 'Novelty']:
    best_config = summary[metric].idxmax()
    best_value = summary[metric].max()
    print(f"  {metric:15s}: {best_config} ({best_value:.3f})")

## 4. Detailed Comparison

Let's do a side-by-side comparison of recommendations from different configurations.

In [None]:
# Compare recommendations for a specific book
query_book = "1984"
n_show = 5

print(f"\nComparing Top {n_show} Recommendations for: {query_book}")
print("=" * 100)

comparison_configs = [
    ("Collaborative Only", recommenders["Collaborative Only"]),
    ("Content Only", recommenders["Content Only"]),
    ("Balanced 60/40", recommenders["Balanced 60/40"]),
]

for config_name, recommender in comparison_configs:
    print(f"\n{config_name}:")
    print("-" * 100)
    
    matches = recommender.fuzzy_search(query_book, threshold=60)
    if not matches:
        print("  No match found")
        continue
    
    book_idx = matches[0][1]
    recommendations = recommender.recommend_hybrid(book_idx, n_recommendations=n_show)
    
    for i, (idx, combined, collab, content) in enumerate(recommendations, 1):
        title = recommender.idx_to_title[idx]
        book_id = recommender.book_ids[idx]
        
        print(f"  {i}. {title[:60]}")
        print(f"     Score: {combined:.3f} (collab: {collab:.3f}, content: {content:.3f})")
        
        if book_id in metadata_dict:
            meta = metadata_dict[book_id]
            genres = meta.get('shelves', '').split(',')[:3]
            if genres and genres[0]:
                print(f"     Genres: {', '.join(genres)}")

## 5. Performance Analysis

In [None]:
# Benchmark recommendation speed
import time

sample_books = list(recommenders["Balanced 60/40"].title_to_idx.keys())[:100]

timing_results = {}

for config_name, recommender in list(recommenders.items())[:3]:  # Test 3 configs
    times = []
    
    for book_title in sample_books[:20]:  # Test on 20 books
        if book_title in recommender.title_to_idx:
            book_idx = recommender.title_to_idx[book_title]
            
            start = time.time()
            _ = recommender.recommend_hybrid(book_idx, n_recommendations=10)
            elapsed = time.time() - start
            
            times.append(elapsed * 1000)  # Convert to ms
    
    timing_results[config_name] = {
        'mean': np.mean(times),
        'median': np.median(times),
        'std': np.std(times)
    }

print("\nRecommendation Speed (milliseconds):")
print("=" * 60)
for config, stats in timing_results.items():
    print(f"{config:25s}: {stats['mean']:.2f}ms (±{stats['std']:.2f}ms)")

## 6. Recommendations

Based on the evaluation, here are the recommendations for production use.

In [None]:
print("\n" + "=" * 80)
print("RECOMMENDATIONS FOR PRODUCTION")
print("=" * 80)

best_overall = summary.mean(axis=1).idxmax()

print(f"\nBest Overall Configuration: {best_overall}")
print(f"\nMetrics:")
for metric in ['Avg Score', 'Diversity', 'Coverage', 'Novelty']:
    value = summary.loc[best_overall, metric]
    print(f"  {metric:15s}: {value:.3f}")

print("\n" + "-" * 80)
print("\nUse Cases:")
print("  - For accuracy-focused recommendations: Collaborative Heavy (0.7/0.3)")
print("  - For diversity and discovery: Content Heavy (0.4/0.6)")
print("  - For balanced approach: Balanced 60/40 or Equal Weights")
print("  - For cold-start books: Content Only (0.0/1.0)")
print("\n" + "=" * 80)

## 7. Save Best Model Configuration

In [None]:
# Save the best configuration
import json

# Determine best weights based on metrics
best_config = {
    'collaborative_weight': 0.6,
    'content_weight': 0.4,
    'n_neighbors': 30,
    'metrics': {
        'avg_score': float(summary.loc['Balanced 60/40', 'Avg Score']),
        'diversity': float(summary.loc['Balanced 60/40', 'Diversity']),
        'coverage': float(summary.loc['Balanced 60/40', 'Coverage']),
        'novelty': float(summary.loc['Balanced 60/40', 'Novelty'])
    }
}

with open('../data/best_model_config.json', 'w') as f:
    json.dump(best_config, f, indent=2)

print("✓ Best model configuration saved to: data/best_model_config.json")
print("\nConfiguration:")
print(json.dumps(best_config, indent=2))