In [14]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import difflib

In [15]:
ratings = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv")
books = pd.read_csv("https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv")

print("Ratings shape:", ratings.shape)
print("Books shape:", books.shape)

Ratings shape: (5976479, 3)
Books shape: (10000, 23)


In [28]:
user_counts = ratings['user_id'].value_counts()
active_users = user_counts[user_counts >= 50].index
ratings = ratings[ratings['user_id'].isin(active_users)]
print("Shape after user filtering:", ratings.shape)

book_counts = ratings['book_id'].value_counts()
popular_books = book_counts[book_counts >= 10].index
ratings = ratings[ratings['book_id'].isin(popular_books)]
print("Shape after book filtering:", ratings.shape)

ratings = ratings.merge(books[['book_id', 'title']], left_on='book_id', right_on='book_id')
print("Shape after merge:", ratings.shape)

# 🧱 Create pivot table (Book Title × User)
pivot = ratings.pivot_table(index='title', columns='user_id', values='rating').fillna(0)
print("Shape of pivot table:", pivot.shape)

# 📏 Convert to sparse matrix
book_sparse_matrix = csr_matrix(pivot.values)

# 🤖 Train NearestNeighbors model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(book_sparse_matrix)

print("Model trained successfully!")

Ratings shape before filtering: (5976479, 3)
Books shape: (10000, 23)
Shape after user filtering: (5942836, 3)
Shape after book filtering: (5942793, 3)
Shape after merge: (5942793, 4)
Shape of pivot table: (9958, 52525)
Model trained successfully!


In [29]:
import difflib

def get_recommends(book_title):
    closest_match = difflib.get_close_matches(book_title, pivot.index, n=1)
    if not closest_match:
        return [book_title, []]

    book_title = closest_match[0]
    idx = pivot.index.get_loc(book_title)

    distances, indices = model.kneighbors(book_sparse_matrix[idx], n_neighbors=6)
    recommended_books = []
    for i in range(1, len(distances[0])):
        recommended_books.append([pivot.index[indices[0][i]], distances[0][i]])

    return [book_title, recommended_books]


In [30]:
result = get_recommends("Harry Potter and the Chamber of Secrets (Book 2)")
print(result)

['Harry Potter and the Chamber of Secrets (Harry Potter, #2)', [['Harry Potter and the Prisoner of Azkaban (Harry Potter, #3)', np.float64(0.09357128007834048)], ['Harry Potter and the Goblet of Fire (Harry Potter, #4)', np.float64(0.11517681198172391)], ['Harry Potter and the Half-Blood Prince (Harry Potter, #6)', np.float64(0.13613532775521875)], ['Harry Potter and the Order of the Phoenix (Harry Potter, #5)', np.float64(0.13758823688501398)], ['Harry Potter and the Deathly Hallows (Harry Potter, #7)', np.float64(0.16585632795378547)]]]
