In [None]:
# ✅ Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# ✅ Step 2: Load Dataset
books = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv")
ratings = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv")

print("Books shape:", books.shape)
print("Ratings shape:", ratings.shape)

Books shape: (10000, 23)
Ratings shape: (5976479, 3)


In [None]:
# ✅ Step 3: Filter & Pivot Ratings Matrix
# Relaxed filtering to avoid empty matrix
user_counts = ratings['user_id'].value_counts()
ratings = ratings[ratings['user_id'].isin(user_counts[user_counts >= 50].index)]

book_counts = ratings['book_id'].value_counts()
ratings = ratings[ratings['book_id'].isin(book_counts[book_counts >= 50].index)]

# Merge to get book titles
ratings = ratings.merge(books[['book_id', 'title']], on='book_id')

# Create pivot table (books × users)
book_user_matrix = ratings.pivot_table(index='title', columns='user_id', values='rating')
book_user_matrix.fillna(0, inplace=True)
print("Final matrix shape:", book_user_matrix.shape)

Final matrix shape: (9893, 52525)


In [None]:
# ✅ Step 4: Fit Nearest Neighbors Model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(book_user_matrix.values)
book_titles = book_user_matrix.index.tolist()

In [None]:
# ✅ Step 5: Recommendation Function
def get_recommends(book_title):
    if book_title not in book_titles:
        print(f"Book '{book_title}' not found in data.")
        return [book_title, []]

    index = book_titles.index(book_title)
    distances, indices = model.kneighbors([book_user_matrix.values[index]], n_neighbors=6)

    recommended = []
    for i in range(1, 6):  # Skip the first match (the book itself)
        title = book_titles[indices[0][i]]
        dist = distances[0][i]
        recommended.append([title, dist])

    return [book_title, recommended]

In [None]:
# ✅ Step 6: Test the Recommender
book_title = "The Hobbit (The Lord of the Rings, #0)"
result = get_recommends(book_title)

# Display results
print("\nRecommendations for:", result[0])
for i, (title, dist) in enumerate(result[1], 1):
    print(f"{i}. {title} (similarity: {round(1 - dist, 2)})")

Book 'The Hobbit (The Lord of the Rings, #0)' not found in data.

Recommendations for: The Hobbit (The Lord of the Rings, #0)


In [None]:
# ✅ Step 7 (Optional): Explore Titles
# Show first few book titles to help testing
print("\nAvailable titles for testing:")
print(book_user_matrix.index[:10].tolist())


Available titles for testing:
[' Angels (Walsh Family, #3)', '#GIRLBOSS', "'Salem's Lot", "'Tis (Frank McCourt, #2)", '1,000 Places to See Before You Die', '10% Happier: How I Tamed the Voice in My Head, Reduced Stress Without Losing My Edge, and Found Self-Help That Actually Works', '100 Bullets, Vol. 1: First Shot, Last Call', '100 Love Sonnets', '100 Selected Poems', "10th Anniversary (Women's Murder Club, #10)"]


In [None]:
get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")


Book 'The Queen of the Damned (Vampire Chronicles (Paperback))' not found in data.


['The Queen of the Damned (Vampire Chronicles (Paperback))', []]