In [64]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/book-recommender-system/BX-Books.csv
/kaggle/input/book-recommender-system/BX-Book-Ratings.csv
/kaggle/input/book-recommender-system/BX-Users.csv


# Collaborative Filtering-based Recommendation System
A Collaborative Filtering-based Recommendation System makes predictions about a user’s interests by collecting preferences from many users. It assumes that users who agreed in the past will agree in the future about item preferences.

### Pros
- Learns from actual user behavior.
- No need for detailed metadata.

### Cons
- Cold start issue for new users/items.
- Needs a lot of user-item interaction data.
- May suffer from popularity bias.

# Data Preprocessing 

In [65]:
import pandas as pd 
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [66]:
# Loading the first dataframe
books = pd.read_csv('/kaggle/input/book-recommender-system/BX-Books.csv', sep=';', on_bad_lines='skip', encoding='latin-1')
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [67]:
books['Image-URL-L'][0]

'http://images.amazon.com/images/P/0195153448.01.LZZZZZZZ.jpg'

In [68]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [69]:
books.iloc[786]['Book-Title']

"Suzanne's Diary for Nicholas"

In [70]:
books.shape

(271360, 8)

In [71]:
books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-L']]

In [72]:
# rename weried column names

books.rename(columns={
    'ISBN':'ISBN',
    'Book-Title':'title',
    'Book-Author': 'author',
    'Year-Of-Publication':'year',
    'Publisher':'publisher',
    'Image-URL-L':'url'
}, inplace = True)

In [73]:
books.head()

Unnamed: 0,ISBN,title,author,year,publisher,url
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...


In [74]:
# Loading the second dataframe
users = pd.read_csv('/kaggle/input/book-recommender-system/BX-Users.csv', sep=';', on_bad_lines = 'skip', encoding='latin-1')
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [75]:
users.shape

(278858, 3)

In [76]:
users.columns

Index(['User-ID', 'Location', 'Age'], dtype='object')

In [77]:
# rename weried column names

users.rename(columns={
    'User-ID':'user_id',
    'Location':'location',
    'Age':'age'
}, inplace = True)

In [78]:
users

Unnamed: 0,user_id,location,age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",
...,...,...,...
278853,278854,"portland, oregon, usa",
278854,278855,"tacoma, washington, united kingdom",50.0
278855,278856,"brampton, ontario, canada",
278856,278857,"knoxville, tennessee, usa",


In [79]:
# loading the third dataframe

ratings = pd.read_csv('/kaggle/input/book-recommender-system/BX-Book-Ratings.csv', sep=';', on_bad_lines='skip', encoding='latin-1')
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [80]:
ratings.shape

(1149780, 3)

In [81]:
ratings.columns

Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

In [82]:
# rename the weird column names

ratings.rename(columns={
    'User-ID':'user_id',
    'Book-Rating':'rating'
}, inplace = True)

In [83]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


Now we have 3 dataframes:
1. books
2. users
3. ratings

In [84]:
books.shape, users.shape, ratings.shape

((271360, 6), (278858, 3), (1149780, 3))

In [85]:
ratings['user_id'].value_counts()

user_id
11676     13602
198711     7550
153662     6109
98391      5891
35859      5850
          ...  
116180        1
116166        1
116154        1
116137        1
276723        1
Name: count, Length: 105283, dtype: int64

In [86]:
ratings['user_id'].value_counts().shape

(105283,)

In [87]:
# storing the information of those who have read/rate at least 200 books

x = ratings['user_id'].value_counts() > 200
x

user_id
11676      True
198711     True
153662     True
98391      True
35859      True
          ...  
116180    False
116166    False
116154    False
116137    False
276723    False
Name: count, Length: 105283, dtype: bool

In [88]:
x[x].shape # no of true will return

(899,)

In [89]:
y = x[x].index # storing the indexes of those who have read/rate atleast 200 books

In [90]:
y

Index([ 11676, 198711, 153662,  98391,  35859, 212898, 278418,  76352, 110973,
       235105,
       ...
       260183,  73681,  44296, 155916,   9856, 274808,  28634,  59727, 268622,
       188951],
      dtype='int64', name='user_id', length=899)

In [91]:
ratings.shape

(1149780, 3)

In [92]:
# filters the ratings dataframe to include only the rows where the user_id column has values that are present in 'y'
ratings = ratings[ratings ['user_id'].isin(y)]

In [93]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8
1459,277427,0030615321,0
1460,277427,0060002050,0


In [94]:
ratings.shape

(526356, 3)

In [95]:
# Now join the ratings with books
# merges the ratings and books dataframes based on the ISBN column

rating_with_books = ratings.merge(books, on='ISBN')

In [96]:
rating_with_books.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,url
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,http://images.amazon.com/images/P/0026217457.0...
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning,http://images.amazon.com/images/P/003008685X.0...
3,277427,0030615321,0,"Ask for May, Settle for June (A Doonesbury book)",G. B. Trudeau,1982,Henry Holt &amp; Co,http://images.amazon.com/images/P/0030615321.0...
4,277427,0060002050,0,On a Wicked Dawn (Cynster Novels),Stephanie Laurens,2002,Avon Books,http://images.amazon.com/images/P/0060002050.0...


In [100]:
# group the dataframe by title column and count the number of ratings for each book

number_of_ratings = rating_with_books.groupby('title')['rating'].count()

In [103]:
number_of_ratings.head()

title
A Light in the Storm: The Civil War Diary of Amelia Martin, Fenwick Island, Delaware, 1861 (Dear America)    2
Always Have Popsicles                                                                                        1
Apple Magic (The Collector's series)                                                                         1
Beyond IBM: Leadership Marketing and Finance for the 1990s                                                   1
Clifford Visita El Hospital (Clifford El Gran Perro Colorado)                                                1
Name: rating, dtype: int64

In [104]:
number_of_ratings = rating_with_books.groupby('title')['rating'].count().reset_index()

In [105]:
number_of_ratings

Unnamed: 0,title,rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1
...,...,...
160264,Ã?Â?ber die Pflicht zum Ungehorsam gegen den S...,3
160265,Ã?Â?lpiraten.,1
160266,Ã?Â?rger mit Produkt X. Roman.,1
160267,Ã?Â?stlich der Berge.,1


In [106]:
number_of_ratings.rename(columns={
    'rating': 'number_of_ratings'
}, inplace = True)

In [107]:
number_of_ratings

Unnamed: 0,title,number_of_ratings
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1
...,...,...
160264,Ã?Â?ber die Pflicht zum Ungehorsam gegen den S...,3
160265,Ã?Â?lpiraten.,1
160266,Ã?Â?rger mit Produkt X. Roman.,1
160267,Ã?Â?stlich der Berge.,1


In [108]:
final_ratings = rating_with_books.merge(number_of_ratings, on='title')

In [109]:
final_ratings.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,url,number_of_ratings
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,http://images.amazon.com/images/P/0026217457.0...,7
2,277427,003008685X,8,Pioneers,James Fenimore Cooper,1974,Thomson Learning,http://images.amazon.com/images/P/003008685X.0...,1
3,277427,0030615321,0,"Ask for May, Settle for June (A Doonesbury book)",G. B. Trudeau,1982,Henry Holt &amp; Co,http://images.amazon.com/images/P/0030615321.0...,1
4,277427,0060002050,0,On a Wicked Dawn (Cynster Novels),Stephanie Laurens,2002,Avon Books,http://images.amazon.com/images/P/0060002050.0...,13


In [111]:
final_ratings.shape

(487671, 9)

In [112]:
# lets take only those books which got ratings from atleast 50 users

final_ratings = final_ratings[final_ratings['number_of_ratings'] >= 50]

In [113]:
final_ratings.head()

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,url,number_of_ratings
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
13,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,133
15,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,108
18,277427,0061009059,9,One for the Money (Stephanie Plum Novels (Pape...,Janet Evanovich,1995,HarperTorch,http://images.amazon.com/images/P/0061009059.0...,108
24,277427,006440188X,0,The Secret Garden,Frances Hodgson Burnett,1998,HarperTrophy,http://images.amazon.com/images/P/006440188X.0...,79


In [114]:
final_ratings.shape

(61853, 9)

In [115]:
final_ratings.reset_index()

Unnamed: 0,index,user_id,ISBN,rating,title,author,year,publisher,url,number_of_ratings
0,0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
1,13,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,133
2,15,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,108
3,18,277427,0061009059,9,One for the Money (Stephanie Plum Novels (Pape...,Janet Evanovich,1995,HarperTorch,http://images.amazon.com/images/P/0061009059.0...,108
4,24,277427,006440188X,0,The Secret Garden,Frances Hodgson Burnett,1998,HarperTrophy,http://images.amazon.com/images/P/006440188X.0...,79
...,...,...,...,...,...,...,...,...,...,...
61848,487505,275970,1400031354,0,Tears of the Giraffe (No.1 Ladies Detective Ag...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031354.0...,84
61849,487506,275970,1400031362,0,Morality for Beautiful Girls (No.1 Ladies Dete...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031362.0...,60
61850,487579,275970,1573229725,0,Fingersmith,Sarah Waters,2002,Riverhead Books,http://images.amazon.com/images/P/1573229725.0...,59
61851,487618,275970,1586210661,9,Me Talk Pretty One Day,David Sedaris,2001,Time Warner Audio Major,http://images.amazon.com/images/P/1586210661.0...,146


In [117]:
final_ratings.columns

Index(['user_id', 'ISBN', 'rating', 'title', 'author', 'year', 'publisher',
       'url', 'number_of_ratings'],
      dtype='object')

In [118]:
# lets drop the duplicates

final_ratings.drop_duplicates(['user_id', 'title'], inplace=True)


In [119]:
final_ratings.shape

(59850, 9)

In [120]:
# create pivot table

book_pivot = final_ratings.pivot_table(columns='user_id', index='title', values='rating')

In [121]:
book_pivot

user_id,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,,,0.0,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,,...,,,,0.0,,,,,0.0,
4 Blondes,,,,,,,,,,0.0,...,,,,,,,,,,
84 Charing Cross Road,,,,,,,,,,,...,,,,,,10.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,7.0,,,,,7.0,,...,,,,,,0.0,,,,
You Belong To Me,,,,,,,,,,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,,,0.0,...,,,,,,0.0,,,,
Zoya,,,,,,,,,,,...,,,,,,,,,,


In [122]:
book_pivot.shape

(742, 888)

In [123]:
book_pivot.fillna(0, inplace=True)

In [124]:
book_pivot

user_id,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84 Charing Cross Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Training the Model

### csr_matrix
The `csr_matrix` stands for Compressed Sparse Row matrix, a highly efficient data structure from SciPy used to store sparse matrices — matrices with a lot of zero values.

Use it when:

- You have a large matrix (e.g., user-item interactions) with many zeros.
- You want to perform fast matrix operations like dot product or slicing.

In [125]:
from scipy.sparse import csr_matrix

In [126]:
book_sparse = csr_matrix(book_pivot)

In [127]:
book_sparse

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 14961 stored elements and shape (742, 888)>

In [128]:
type(book_sparse)

scipy.sparse._csr.csr_matrix

### NearestNeighbors Algorithm 
The NearestNeighbors algorithm is unsupervised machine learning algorithm.
- It doesn’t require labeled data (no need for class labels or target variables).
- It just finds the closest data points in the feature space based on a distance metric (e.g., cosine, Euclidean).
- It’s used for similarity search, not classification or regression.

**Parameters**

| Parameter     | Description                                                    |
| ------------- | -------------------------------------------------------------- |
| `n_neighbors` | Number of neighbors to return                                  |
| `metric`      | Distance metric (e.g., `'cosine'`, `'euclidean'`)              |
| `algorithm`   | Search method: `'auto'`, `'brute'`, `'kd_tree'`, `'ball_tree'` |


In [131]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors()

model.fit(book_sparse)

In [145]:
book_pivot.iloc[237,:].shape

(888,)

In [146]:
distance, suggestions = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=5)

In [147]:
distance

array([[ 0.        , 67.75691847, 68.05145112, 72.277244  , 75.81556568]])

In [148]:
suggestions

array([[237, 238, 240, 241, 184]])

In [150]:
for i in range(len(suggestions)):
    print(book_pivot.index[suggestions[i]])

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive'],
      dtype='object', name='title')


# Testing the Model

In [168]:
def recommend_book(book_name):
    book_id = np.where(book_pivot.index == book_name)[0][0]
    distance, suggestions = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6)


    for i in range(len(suggestions)):
        books = book_pivot.index[suggestions[i]]
        for j in books:
            if j == book_name:
                print(f"You searched {book_name} \n")
                print(f"The suggested books are: \n")
            else:
                print(j)

In [169]:
book_name = 'The Long Road Home'
recommend_book(book_name)

You searched The Long Road Home 

The suggested books are: 

Message from Nam
Exclusive
The Cradle Will Fall
Fine Things
Jacob Have I Loved


In [170]:
# import pickle


# # Save NearestNeighbors model
# with open('book_recommender_model.pkl', 'wb') as model_file:
#     pickle.dump(model, model_file)

# # Save book_pivot (optional, if needed in production)
# with open('book_pivot.pkl', 'wb') as pivot_file:
#     pickle.dump(book_pivot, pivot_file)


In [173]:
import pickle

# Save model
with open('/kaggle/working/book_recommender_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

# Save pivot table (if needed)
with open('//kaggle/working/book_pivot.pkl', 'wb') as pivot_file:
    pickle.dump(book_pivot, pivot_file)


In [174]:
import os

print(os.listdir('/kaggle/working'))


['.virtual_documents', 'book_pivot.pkl', 'book_recommender_model.pkl']
