In [2]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import pickle
import os

# Load data (Update paths if files are in a different folder)
books = pd.read_csv('Data/BX-Books.csv', sep=';', on_bad_lines='skip', encoding='latin-1')
books = books[['ISBN','Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher','Image-URL-L']]
books.rename(columns={"Book-Title":'title',
                      'Book-Author':'author',
                      "Year-Of-Publication":'year',
                      "Publisher":"publisher",
                      "Image-URL-L":"image_url"}, inplace=True)

users = pd.read_csv('Data/BX-Users.csv', sep=';', on_bad_lines='skip', encoding='latin-1')
users.rename(columns={"User-ID":'user_id', 'Location':'location', "Age":'age'}, inplace=True)

ratings = pd.read_csv('Data/BX-Book-Ratings.csv', sep=';', on_bad_lines='skip', encoding='latin-1')
ratings.rename(columns={"User-ID":'user_id', 'Book-Rating':'rating'}, inplace=True)

# Filter users with more than 200 ratings
active_users = ratings['user_id'].value_counts()
active_users = active_users[active_users > 200].index
ratings = ratings[ratings['user_id'].isin(active_users)]

# Merge ratings with book info
ratings_with_books = ratings.merge(books, on='ISBN')

# Count number of ratings per book
number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()
number_rating.rename(columns={'rating': 'num_of_rating'}, inplace=True)

# Merge to filter only popular books
final_rating = ratings_with_books.merge(number_rating, on='title')
final_rating = final_rating[final_rating['num_of_rating'] >= 50]
final_rating.drop_duplicates(['user_id', 'title'], inplace=True)

# Create pivot table
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values='rating')
book_pivot.fillna(0, inplace=True)

# Create sparse matrix
book_sparse = csr_matrix(book_pivot)

# Train KNN model
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

# Save model artifacts
os.makedirs('artifacts', exist_ok=True)
pickle.dump(model, open('artifacts/model.pkl', 'wb'))
pickle.dump(list(book_pivot.index), open('artifacts/book_names.pkl', 'wb'))
pickle.dump(final_rating, open('artifacts/final_rating.pkl', 'wb'))
pickle.dump(book_pivot, open('artifacts/book_pivot.pkl', 'wb'))

# Show top 50 most rated books
top_50_books = number_rating.sort_values('num_of_rating', ascending=False).head(50)
print("\nTop 50 Most Rated Books:\n")
print(top_50_books[['title', 'num_of_rating']].reset_index(drop=True))

# Recommendation function
def recommend_book(book_name):
    if book_name not in book_pivot.index:
        print(f"\n Book '{book_name}' not found in the dataset.")
        return
    
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distances, suggestions = model.kneighbors(book_pivot.iloc[book_id, :].values.reshape(1, -1), n_neighbors=6)

    print(f"\nYou searched for: '{book_name}'\n")
    print("📚 Recommended books:\n")
    
    for i in suggestions[0]:
        suggested_title = book_pivot.index[i]
        if suggested_title != book_name:
            print(f"➡ {suggested_title}")


  books = pd.read_csv('Data/BX-Books.csv', sep=';', on_bad_lines='skip', encoding='latin-1')



Top 50 Most Rated Books:

                                                title  num_of_rating
0                                         Wild Animus            363
1                               Bridget Jones's Diary            277
2                           The Lovely Bones: A Novel            270
3                                        The Notebook            241
4                                   The Pelican Brief            236
5                          The Nanny Diaries: A Novel            230
6                                     A Painted House            228
7     Divine Secrets of the Ya-Ya Sisterhood: A Novel            228
8                                            The Firm            227
9                                   The Da Vinci Code            224
10                                The Horse Whisperer            213
11                                Message in a Bottle            212
12                                     A Time to Kill            210
13     

In [3]:
# Testing
book_name = "Table For Two"
recommend_book(book_name)


You searched for: 'Table For Two'

📚 Recommended books:

➡ No Safe Place
➡ Truly, Madly Manhattan
➡ Exclusive
➡ Summer Pleasures
➡ Homeport
