In [None]:
import torch
import pandas as pd
import pickle

# Load mappings and metadata
with open('data/processed/user_mapping.pkl', 'rb') as f:
    user_map = pickle.load(f)
with open('data/processed/book_mapping.pkl', 'rb') as f:
    book_map = pickle.load(f)

book_metadata = pd.read_csv('data/processed/book_metadata.csv')

# Dummy recommendations: top 10 most popular books in filtered set
ratings = pd.read_csv('data/processed/filtered_ratings.csv')
popular_books = ratings['ISBN'].value_counts().head(10).index
top_books = book_metadata[book_metadata['ISBN'].isin(popular_books)]

print("Top 10 Popular Books (Popularity Baseline):")
print(top_books[['Book-Title', 'Book-Author']].to_string(index=False))

# For any user
def get_popularity_recommendations(n=10):
    return top_books.head(n)

print("\nExample recommendations for any user:")
print(get_popularity_recommendations(10)[['Book-Title', 'Book-Author']])

Top 10 Popular Books (Popularity Baseline):
                                                      Book-Title   Book-Author
                                                     Wild Animus  Rich Shapero
                                         The Secret Life of Bees Sue Monk Kidd
                                       The Lovely Bones: A Novel  Alice Sebold
                             The Red Tent (Bestselling Backlist) Anita Diamant
                                               The Da Vinci Code     Dan Brown
                 Divine Secrets of the Ya-Ya Sisterhood: A Novel Rebecca Wells
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)) J. K. Rowling
                                      Interview with the Vampire     Anne Rice
              Harry Potter and the Order of the Phoenix (Book 5) J. K. Rowling

Example recommendations for any user:
                                            Book-Title    Book-Author
2                                          Wild Animus   

In [10]:
# 06 - Personalized Book Recommendations (Final Fixed Version)

import torch
import pandas as pd
import pickle
import os

print("Loading data and mappings...")

Loading data and mappings...


In [11]:
# Load mappings and metadata
with open('data/processed/user_mapping.pkl', 'rb') as f:
    user_map = pickle.load(f)
node_to_user = user_map['node_to_user']
user_to_node = user_map['user_to_node']

with open('data/processed/book_mapping.pkl', 'rb') as f:
    book_map = pickle.load(f)
node_to_book = book_map['node_to_book']
book_to_node = book_map['book_to_node']

book_metadata = pd.read_csv('data/processed/book_metadata.csv').set_index('ISBN')
ratings = pd.read_csv('data/processed/filtered_ratings.csv')

# Load graph info
from torch_geometric.data.data import DataTensorAttr, DataEdgeAttr
from torch_geometric.data.storage import GlobalStorage
import torch.serialization
torch.serialization.add_safe_globals([DataTensorAttr, DataEdgeAttr, GlobalStorage])

data = torch.load('data/processed/graph_data.pt')
num_users = data.num_users
num_books = data.num_books

train_edge_index = torch.load('data/processed/train_edge_index.pt')

print(f"Loaded: {num_users} users, {num_books} books, {len(ratings)} ratings")

Loaded: 3404 users, 2178 books, 47610 ratings


In [12]:
# Re-define LightGCN model
class LightGCN(torch.nn.Module):
    def __init__(self, num_users, num_books, embedding_dim=64, num_layers=3):
        super().__init__()
        self.num_users = num_users
        self.num_books = num_books
        self.user_embedding = torch.nn.Embedding(num_users, embedding_dim)
        self.item_embedding = torch.nn.Embedding(num_books, embedding_dim)
        torch.nn.init.normal_(self.user_embedding.weight, std=0.01)
        torch.nn.init.normal_(self.item_embedding.weight, std=0.01)
    
    def forward(self, edge_index):
        x = torch.cat([self.user_embedding.weight, self.item_embedding.weight], dim=0)
        outs = [x]
        row, col = edge_index
        deg = torch_geometric.utils.degree(row, num_nodes=x.size(0))
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        sparse = torch.sparse_coo_tensor(edge_index, norm, (x.size(0), x.size(0)))
        for _ in range(3):  # num_layers = 3
            x = sparse @ x
            outs.append(x)
        final = sum(outs) / len(outs)
        return torch.split(final, [self.num_users, self.num_books])

model = LightGCN(num_users, num_books)

# Load trained model if exists
if os.path.exists('models/best_lightgcn.pt'):
    model.load_state_dict(torch.load('models/best_lightgcn.pt'))
    print("Loaded trained LightGCN model!")
else:
    print("No trained model found â€” using random embeddings for demonstration")

Loaded trained LightGCN model!


In [13]:
# Fixed recommendation function (robust to missing metadata)
@torch.no_grad()
def get_recommendations(user_id, k=10):
    if user_id not in user_to_node:
        print(f"User {user_id} not in filtered dataset")
        return pd.DataFrame()
    
    model.eval()
    user_emb, item_emb = model(train_edge_index)
    
    u_node = user_to_node[user_id]
    scores = user_emb[u_node] @ item_emb.t()
    
    # Mask rated books
    user_rated = set(ratings[ratings['User-ID'] == user_id]['ISBN'])
    mask = torch.tensor([node_to_book[node + num_users] in user_rated for node in range(num_books)])
    scores[mask] = -float('inf')
    
    _, top_idx = torch.topk(scores, k + len(user_rated))  # extra for masking
    top_isbns = []
    for idx in top_idx:
        isbn = node_to_book[idx.item() + num_users]
        if isbn not in user_rated:
            top_isbns.append(isbn)
        if len(top_isbns) == k:
            break
    
    # Safe merge
    rec_df = pd.DataFrame({'ISBN': top_isbns})
    rec_df = rec_df.merge(book_metadata[['Book-Title', 'Book-Author']].reset_index(), on='ISBN', how='left')
    rec_df['Book-Title'] = rec_df['Book-Title'].fillna('Unknown Title')
    rec_df['Book-Author'] = rec_df['Book-Author'].fillna('Unknown Author')
    rec_df = rec_df[['ISBN', 'Book-Title', 'Book-Author']]
    rec_df.columns = ['ISBN', 'Title', 'Author']
    
    return rec_df

In [14]:
# Popularity Baseline (Fixed)
popular_isbns = ratings['ISBN'].value_counts().head(10).index
pop_df = pd.DataFrame({'ISBN': popular_isbns})
pop_recs = pop_df.merge(book_metadata[['Book-Title', 'Book-Author']].reset_index(), on='ISBN', how='left')
pop_recs['Book-Title'] = pop_recs['Book-Title'].fillna('Unknown Title')
pop_recs['Book-Author'] = pop_recs['Book-Author'].fillna('Unknown Author')
pop_recs = pop_recs[['ISBN', 'Book-Title', 'Book-Author']]
pop_recs.columns = ['ISBN', 'Title', 'Author']

print("=== Popularity Baseline (Top 10 Most Rated Books) ===")
print(pop_recs[['Title', 'Author']].to_string(index=False))

=== Popularity Baseline (Top 10 Most Rated Books) ===
                                                           Title         Author
                                       The Lovely Bones: A Novel   Alice Sebold
                                               The Da Vinci Code      Dan Brown
Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))  J. K. Rowling
                             The Red Tent (Bestselling Backlist)  Anita Diamant
              Harry Potter and the Order of the Phoenix (Book 5)  J. K. Rowling
                                                   Unknown Title Unknown Author
                                      Interview with the Vampire      Anne Rice
                                         The Secret Life of Bees  Sue Monk Kidd
                 Divine Secrets of the Ya-Ya Sisterhood: A Novel  Rebecca Wells
                                                     Wild Animus   Rich Shapero


In [21]:
# ...existing code...
@torch.no_grad()
def get_recommendations(user_id, k=10):
    if user_id not in user_to_node:
        print(f"User {user_id} not in filtered dataset")
        return pd.DataFrame()
    
    model.eval()
    user_emb, item_emb = model(train_edge_index)
    
    u_node = user_to_node[user_id]
    scores = user_emb[u_node] @ item_emb.t()
    
    # Mask rated books
    user_rated = set(ratings[ratings['User-ID'] == user_id]['ISBN'])
    
    # If user rated all books, return empty
    if len(user_rated) >= num_books:
        return pd.DataFrame()
    
    mask_list = [node_to_book[node + num_users] in user_rated for node in range(num_books)]
    mask = torch.tensor(mask_list, dtype=torch.bool, device=scores.device)
    scores = scores.clone()
    scores[mask] = -float('inf')
    
    # Ensure we don't request more than available items
    needed_k = min(k + len(user_rated), num_books)
    _, top_idx = torch.topk(scores, needed_k)
    
    top_isbns = []
    for idx in top_idx:
        isbn = node_to_book[int(idx.item()) + num_users]
        if isbn not in user_rated:
            top_isbns.append(isbn)
        if len(top_isbns) == k:
            break
    
    rec_df = pd.DataFrame({'ISBN': top_isbns})
    rec_df = rec_df.merge(book_metadata[['Book-Title', 'Book-Author']].reset_index(), on='ISBN', how='left')
    rec_df['Book-Title'] = rec_df['Book-Title'].fillna('Unknown Title')
    rec_df['Book-Author'] = rec_df['Book-Author'].fillna('Unknown Author')
    rec_df = rec_df[['ISBN', 'Book-Title', 'Book-Author']]
    rec_df.columns = ['ISBN', 'Title', 'Author']
    
    return rec_df
# ...existing code...

In [None]:
# Define sample users and show recommendations
sample_users = list(user_to_node.keys())[:5]  # First 5 users in your filtered set

print("\n=== Personalized LightGCN Recommendations ===\n")
for uid in sample_users:
    print(f"User-ID: {uid}")
    recs = get_recommendations(uid, k=10)
    if not recs.empty:
        print(recs[['Title', 'Author']].to_string(index=False))
    else:
        print("No recommendations (user has no unrated books or error)")
    print("-" * 60)


=== Personalized LightGCN Recommendations ===

User-ID: 242
Error generating recommendations for user 242: name 'torch_geometric' is not defined
------------------------------------------------------------
User-ID: 254
Error generating recommendations for user 254: name 'torch_geometric' is not defined
------------------------------------------------------------
User-ID: 507
Error generating recommendations for user 507: name 'torch_geometric' is not defined
------------------------------------------------------------
User-ID: 638
Error generating recommendations for user 638: name 'torch_geometric' is not defined
------------------------------------------------------------
User-ID: 643
Error generating recommendations for user 643: name 'torch_geometric' is not defined
------------------------------------------------------------
