In [1]:
# 06 - Personalized Book Recommendations (FINAL WORKING VERSION)

import torch
import pandas as pd
import pickle
import os
from torch_geometric.utils import degree

print("Loading data...")

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.
Starting recommendation notebook...


In [7]:
# Load mappings and data
with open('data/processed/user_mapping.pkl', 'rb') as f:
    user_map = pickle.load(f)
user_to_node = user_map['user_to_node']
node_to_user = user_map['node_to_user']

with open('data/processed/book_mapping.pkl', 'rb') as f:
    book_map = pickle.load(f)
book_to_node = book_map['book_to_node']
node_to_book = book_map['node_to_book']

book_metadata = pd.read_csv('data/processed/book_metadata.csv').set_index('ISBN')
ratings = pd.read_csv('data/processed/filtered_ratings.csv')

# Load graph
from torch_geometric.data.data import DataTensorAttr, DataEdgeAttr
from torch_geometric.data.storage import GlobalStorage
import torch.serialization
torch.serialization.add_safe_globals([DataTensorAttr, DataEdgeAttr, GlobalStorage])

data = torch.load('data/processed/graph_data.pt')
num_users = data.num_users
num_books = data.num_books
train_edge_index = torch.load('data/processed/train_edge_index.pt')

print(f"Loaded: {num_users} users, {num_books} books")

Loaded: 3404 users, 2178 books


In [8]:
# Fixed LightGCN Model (with num_users/num_books saved)
class LightGCN(torch.nn.Module):
    def __init__(self, num_users, num_books, embedding_dim=64, num_layers=3):
        super().__init__()
        self.num_users = num_users      # ← Needed for split
        self.num_books = num_books      # ← Needed for split
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        
        self.user_embedding = torch.nn.Embedding(num_users, embedding_dim)
        self.item_embedding = torch.nn.Embedding(num_books, embedding_dim)
        
        torch.nn.init.normal_(self.user_embedding.weight, std=0.01)
        torch.nn.init.normal_(self.item_embedding.weight, std=0.01)
    
    def forward(self, edge_index):
        x = torch.cat([self.user_embedding.weight, self.item_embedding.weight], dim=0)
        outs = [x]
        
        row, col = edge_index
        deg = degree(row, num_nodes=x.size(0))
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        sparse = torch.sparse_coo_tensor(edge_index, norm, (x.size(0), x.size(0)))
        
        for _ in range(self.num_layers):
            x = sparse @ x
            outs.append(x)
        
        final = sum(outs) / len(outs)
        user_emb, item_emb = torch.split(final, [self.num_users, self.num_books])
        return user_emb, item_emb

# Create and load model
model = LightGCN(num_users, num_books)

if os.path.exists('models/best_lightgcn.pt'):
    model.load_state_dict(torch.load('models/best_lightgcn.pt'))
    print("Loaded trained model!")
else:
    print("No trained model — using random embeddings (still shows pipeline)")

Loaded trained model!


In [16]:
# Fixed recommendation function
@torch.no_grad()
def get_recommendations(user_id, k=10):
    if user_id not in user_to_node:
        print(f"User {user_id} not in filtered dataset")
        return pd.DataFrame(columns=['ISBN', 'Title', 'Author'])
    
    model.eval()
    user_emb, item_emb = model(train_edge_index)
    
    u_node = user_to_node[user_id]
    scores = user_emb[u_node] @ item_emb.t()
    
    # Mask rated books
    user_rated = set(ratings[ratings['User-ID'] == user_id]['ISBN'].tolist())
    # `node_to_book` maps book node indices 0..num_books-1 (no user offset), so use i directly
    mask = torch.tensor([node_to_book[i] in user_rated for i in range(num_books)])
    scores[mask] = -float('inf')
    
    _, top_idx = torch.topk(scores, k + len(user_rated))
    top_isbns = []
    for idx in top_idx:
        # idx refers to item index within item_emb (0..num_books-1)
        isbn = node_to_book[idx.item()]
        if isbn not in user_rated:
            top_isbns.append(isbn)
        if len(top_isbns) == k:
            break
    
    if not top_isbns:
        return pd.DataFrame(columns=['ISBN', 'Title', 'Author'])
    
    rec_df = pd.DataFrame({'ISBN': top_isbns})
    rec_df = rec_df.merge(book_metadata[['Book-Title', 'Book-Author']].reset_index(), on='ISBN', how='left')
    rec_df['Book-Title'] = rec_df['Book-Title'].fillna('Unknown Title')
    rec_df['Book-Author'] = rec_df['Book-Author'].fillna('Unknown Author')
    rec_df = rec_df[['ISBN', 'Book-Title', 'Book-Author']]
    rec_df.columns = ['ISBN', 'Title', 'Author']
    return rec_df

In [15]:
# Popularity Baseline
print("\n=== Popularity Baseline (Top 10 Most Rated Books) ===\n")
popular_isbns = ratings['ISBN'].value_counts().head(10).index
pop_df = pd.DataFrame({'ISBN': popular_isbns})
pop_recs = pop_df.merge(book_metadata[['Book-Title', 'Book-Author']].reset_index(), on='ISBN', how='left')
pop_recs['Book-Title'] = pop_recs['Book-Title'].fillna('Unknown Title')
pop_recs['Book-Author'] = pop_recs['Book-Author'].fillna('Unknown Author')
pop_recs = pop_recs[['ISBN', 'Book-Title', 'Book-Author']]
pop_recs.columns = ['ISBN', 'Title', 'Author']
print(pop_recs[['Title', 'Author']].to_string(index=False))


=== Popularity Baseline (Top 10 Most Rated Books) ===

                                                           Title         Author
                                       The Lovely Bones: A Novel   Alice Sebold
                                               The Da Vinci Code      Dan Brown
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))  J. K. Rowling
                             The Red Tent (Bestselling Backlist)  Anita Diamant
              Harry Potter and the Order of the Phoenix (Book 5)  J. K. Rowling
                                                   Unknown Title Unknown Author
                                      Interview with the Vampire      Anne Rice
                                         The Secret Life of Bees  Sue Monk Kidd
                 Divine Secrets of the Ya-Ya Sisterhood: A Novel  Rebecca Wells
                                                     Wild Animus   Rich Shapero


In [17]:
# Personalized Recommendations
print("\n=== Personalized LightGCN Recommendations ===\n")

sample_users = list(user_to_node.keys())[:5]

for uid in sample_users:
    print(f"User-ID: {uid}")
    recs = get_recommendations(uid, k=10)
    if not recs.empty:
        print(recs[['Title', 'Author']].to_string(index=False))
    else:
        print("No recommendations (all books rated or error)")
    print("-" * 60)


=== Personalized LightGCN Recommendations ===

User-ID: 242
                                                           Title        Author
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)) J. K. Rowling
              Harry Potter and the Order of the Phoenix (Book 5) J. K. Rowling
               Harry Potter and the Prisoner of Azkaban (Book 3) J. K. Rowling
                                       The Lovely Bones: A Novel  Alice Sebold
                    Harry Potter and the Goblet of Fire (Book 4) J. K. Rowling
                Harry Potter and the Chamber of Secrets (Book 2) J. K. Rowling
                                      Interview with the Vampire     Anne Rice
                  Harry Potter and the Sorcerer's Stone (Book 1) J. K. Rowling
                                           To Kill a Mockingbird    Harper Lee
                                               The Da Vinci Code     Dan Brown
------------------------------------------------------------
User-ID: 