In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

books = pd.read_csv("BX-Books.csv", sep=';', encoding="latin-1", on_bad_lines='skip')
users = pd.read_csv("BX-Users.csv", sep=';', encoding="latin-1", on_bad_lines='skip')
ratings = pd.read_csv("BX-Book-Ratings.csv", sep=';', encoding="latin-1", on_bad_lines='skip')

print(books.head(3))

         ISBN            Book-Title           Book-Author Year-Of-Publication  \
0  0195153448   Classical Mythology    Mark P. O. Morford                2002   
1  0002005018          Clara Callan  Richard Bruce Wright                2001   
2  0060973129  Decision in Normandy          Carlo D'Este                1991   

                 Publisher                                        Image-URL-S  \
0  Oxford University Press  http://images.amazon.com/images/P/0195153448.0...   
1    HarperFlamingo Canada  http://images.amazon.com/images/P/0002005018.0...   
2          HarperPerennial  http://images.amazon.com/images/P/0060973129.0...   

                                         Image-URL-M  \
0  http://images.amazon.com/images/P/0195153448.0...   
1  http://images.amazon.com/images/P/0002005018.0...   
2  http://images.amazon.com/images/P/0060973129.0...   

                                         Image-URL-L  
0  http://images.amazon.com/images/P/0195153448.0...  
1  http://image

In [16]:
books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher']]
books.rename(columns = {'Book-Title':'title', 'Book-Author':'author', 'Year-Of-Publication':'year', 'Publisher':'publisher'}, inplace=True)
users.rename(columns = {'User-ID':'user_id', 'Location':'location', 'Age':'age'}, inplace=True)
ratings.rename(columns = {'User-ID':'user_id', 'Book-Rating':'rating'}, inplace=True)

In [17]:
books.head

<bound method NDFrame.head of               ISBN                                              title  \
0       0195153448                                Classical Mythology   
1       0002005018                                       Clara Callan   
2       0060973129                               Decision in Normandy   
3       0374157065  Flu: The Story of the Great Influenza Pandemic...   
4       0393045218                             The Mummies of Urumchi   
...            ...                                                ...   
271355  0440400988                         There's a Bat in Bunk Five   
271356  0525447644                            From One to One Hundred   
271357  006008667X  Lily Dale : The True Story of the Town that Ta...   
271358  0192126040                        Republic (World's Classics)   
271359  0767409752  A Guided Tour of Rene Descartes' Meditations o...   

                      author  year  \
0         Mark P. O. Morford  2002   
1       Richard B

In [18]:
x = ratings['user_id'].value_counts() > 200
y = x[x].index  #user_ids
print(y.shape)
ratings = ratings[ratings['user_id'].isin(y)]

(899,)


In [19]:
rating_with_books = ratings.merge(books, on='ISBN')
rating_with_books.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning
3,277427,0030615321,0,"Ask for May, Settle for June (A Doonesbury book)",G. B. Trudeau,1982,Henry Holt &amp; Co
4,277427,0060002050,0,On a Wicked Dawn (Cynster Novels),Stephanie Laurens,2002,Avon Books


In [20]:
number_rating = rating_with_books.groupby('title')['rating'].count().reset_index()
number_rating.rename(columns= {'rating':'number_of_ratings'}, inplace=True)
final_rating = rating_with_books.merge(number_rating, on='title')
final_rating.shape
final_rating = final_rating[final_rating['number_of_ratings'] >= 50]
final_rating.drop_duplicates(['user_id','title'], inplace=True)

In [21]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title', values="rating")
book_pivot.fillna(0, inplace=True)
print(book_pivot.head())

user_id                254     2276    2766    2977    3363    3757    4017    \
title                                                                           
1984                      9.0     0.0     0.0     0.0     0.0     0.0     0.0   
1st to Die: A Novel       0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2nd Chance                0.0    10.0     0.0     0.0     0.0     0.0     0.0   
4 Blondes                 0.0     0.0     0.0     0.0     0.0     0.0     0.0   
84 Charing Cross Road     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

user_id                4385    6242    6251    ...  274004  274061  274301  \
title                                          ...                           
1984                      0.0     0.0     0.0  ...     0.0     0.0     0.0   
1st to Die: A Novel       0.0     0.0     0.0  ...     0.0     0.0     0.0   
2nd Chance                0.0     0.0     0.0  ...     0.0     0.0     0.0   
4 Blondes                 0.0     0.0     

In [22]:
from scipy.sparse import csr_matrix
book_sparse = csr_matrix(book_pivot)

In [23]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

In [32]:
book_index = 740  # Example: Choosing the 741st book
print(f"Book input: {book_pivot.index[book_index]}") 

Book input: Zoya


In [34]:
distances, suggestions = model.kneighbors(book_pivot.iloc[book_index, :].values.reshape(1, -1))

In [35]:
for i in range(len(suggestions)):
  print(book_pivot.index[suggestions[i]])

Index(['Zoya', 'Fine Things', 'Exclusive', 'Secrets', 'The Cradle Will Fall'], dtype='object', name='title')
