In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
books = pd.read_csv("../data/BX-Books.csv", sep=";", encoding="latin-1", on_bad_lines='warn')

In [None]:
books.head(2)

In [None]:
books = books[["ISBN", "Book-Title", "Book-Author", "Year-Of-Publication", "Publisher", "Image-URL-L"]]

In [None]:
books.head(2)

In [None]:
books.rename(columns={
    "Book-Title": "title",
    "Book-Author": "author",
    "Year-Of-Publication": "publication_year",
    "Publisher": "publisher",
    "Image-URL-L": "image_url"
}, inplace=True)

In [None]:
books.head(2)

In [None]:
users = pd.read_csv("./data/BX-Users.csv", sep=";", encoding="latin-1", on_bad_lines='warn')

In [None]:
users.head()

In [None]:
rating = pd.read_csv("./data/BX-Book-Ratings.csv", sep=";", encoding='latin-1', on_bad_lines='warn')

In [None]:
rating.head()
# rating.shape

In [None]:
rating.rename(columns={
    "User-ID": "user_id",
    "Book-Rating": "rating",
}, inplace=True)

In [None]:
print(books.shape)
print(users.shape)
print(rating.shape)

In [None]:
rating['user_id'].value_counts()

## showing how many of the users give 200 + rating

In [None]:
x = rating['user_id'].value_counts() > 200
# before
print("before: ",x.shape) # 10Lakh plus
#after
print("after: ",x[x].shape) # only 899

In [None]:
#storing the users index who rate 200+

y = x[x].index

In [None]:
ratings = rating[rating['user_id'].isin(y)]

In [None]:
ratings.head()

In [None]:
rating_with_books = ratings.merge(books, on="ISBN")

In [None]:
rating_with_books.head(2)
# rating_with_books.shape

In [None]:
books_num_rating = rating_with_books.groupby('title')['rating'].count().reset_index()

In [None]:
books_num_rating.head()

In [None]:
books_num_rating.rename(columns={
    "rating": "number_of_ratings"
}, inplace=True)

In [None]:
books_num_rating.head()
books_num_rating.shape

In [None]:
final_rating = rating_with_books.merge(books_num_rating, on="title")

In [None]:
final_rating.head(1)

# print(final_rating.shape)

In [None]:
final_rating = final_rating[final_rating['number_of_ratings'] >= 50]

In [None]:
final_rating.shape

In [None]:
final_rating.drop_duplicates(["user_id", "title"], inplace=True)

## final Rating after filtering and deleting all the duplicates ##

In [None]:
final_rating.shape

## creating Pivot Table ##

In [None]:
book_pivot_table = final_rating.pivot_table(columns='user_id', index="title", values= "rating")

In [None]:
book_pivot_table

In [None]:
book_pivot_table.shape

In [None]:
book_pivot_table.fillna(0, inplace=True)

In [None]:
book_pivot_table

In [None]:
from scipy.sparse import csr_matrix  ## import this to only use those fields who have value and nullify the 0 values

In [None]:
book_sparse = csr_matrix(book_pivot_table)

In [None]:
book_sparse

In [None]:
from sklearn.neighbors import NearestNeighbors

In [None]:
book_model = NearestNeighbors(algorithm='brute')

In [None]:
book_model.fit(book_sparse)

In [None]:
book_pivot_table.index[38] ## this is how we see each book index

In [None]:
distance, suggesion = book_model.kneighbors(book_pivot_table.iloc[5, :].values.reshape(1,-1), n_neighbors=5)

In [None]:
distance

In [None]:
suggesion

In [None]:
for i in range(len(suggesion)):
    print(book_pivot_table.index[suggesion[i]])

In [None]:
# book_pivot_table.index[741]
book_pivot_table.index

In [None]:
all_book_name = book_pivot_table.index

In [None]:
all_book_name

In [None]:
import pickle

pickle.dump(book_model,open("Models/Book_Recc_Model.pkl", 'wb')) #creating our model || wb => write banner
pickle.dump(all_book_name,open("Models/all_book_name.pkl", 'wb'))
pickle.dump(final_rating,open("Models/final_ratings.pkl", 'wb'))
with open("Models/book_pivot.pkl", 'wb') as f:
    pickle.dump(book_pivot_table, f)

In [None]:
def recommendedBook():
    book_name = input("Enter your book name: ").strip()
    num_of_sugg = int(input("Number of suggestions: ").strip())
    
    # Ensure book_pivot_table has a lowercased index for comparison
    lower_index = book_pivot_table.index.str.lower()
    
    # Find the book_id using the lowercased index
    try:
        book_id = np.where(lower_index == book_name.lower())[0][0]
    except IndexError:
        print("Book not found. Please try another book name.")
        return

    # Retrieve the nearest neighbors
    distance, suggestion = book_model.kneighbors(book_pivot_table.iloc[book_id, :].values.reshape(1, -1), n_neighbors=num_of_sugg)
    
    print("Recommended books:")
    for i in range(len(suggestion[0])):
        print(book_pivot_table.index[suggestion[0][i]])

# Example of how to use the function
# recommendedBook()


 ## demo of the recommended function

In [None]:
print(recommendedBook())