#This is NovelHunt your personal Book Recommender

In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv
from pandas.errors import ParserError

In [32]:
def read_csv_skip_error_lines(file_path):
    try:
        rows = []
        with open(file_path, 'r', encoding='latin-1') as file:
            reader = csv.reader(file, delimiter=';')
            headers = next(reader)  # Read the header row
            rows.append(headers)  # Store headers

            # Process the remaining rows, skipping problematic lines
            for row in reader:
                if len(row) == len(headers):  # Check if the row matches the header length
                    rows.append(row)

        # Create DataFrame from cleaned rows
        df = pd.DataFrame(rows[1:], columns=rows[0])
        return df
    except Exception as e:
        print(f"Error reading file '{file_path}': {e}")
        return pd.DataFrame()

In [33]:
books = read_csv_skip_error_lines('data/BX-Books.csv')

In [34]:
books.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...


In [35]:
books.shape

(271357, 8)

In [36]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [37]:
books = books[['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S']]

In [38]:
books.head(2)

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...


In [39]:
books.rename(columns={
    "Book-Title": "Title",
    "Book-Author": "Author",
    "Year-Of-Publication": "Year",
    "Publisher": "publisher",
    "Image-URL-S": "Img_url"}, inplace = True)

In [40]:
books.head()

Unnamed: 0,ISBN,Title,Author,Year,publisher,Img_url
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...


In [41]:
users = read_csv_skip_error_lines('data/BX-Users.csv')

In [42]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [43]:
ratings = read_csv_skip_error_lines('data/BX-Book-Ratings.csv')

In [44]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [45]:
ratings.shape

(1149780, 3)

In [46]:
print(books.shape)
print(users.shape)
print(ratings.shape)

(271357, 6)
(278858, 3)
(1149780, 3)


In [47]:
ratings.rename(columns={
    "User-ID": "user_id",
    "Book-Rating": "Rating"},inplace= True)

In [48]:
ratings.head()

Unnamed: 0,user_id,ISBN,Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [49]:
ratings["user_id"].value_counts()

user_id
11676     13602
198711     7550
153662     6109
98391      5891
35859      5850
          ...  
116180        1
116166        1
116154        1
116137        1
276723        1
Name: count, Length: 105283, dtype: int64

In [50]:
ratings["user_id"].unique().shape

(105283,)

In [51]:
x = ratings['user_id'].value_counts() > 200

In [52]:
x[x].shape

(899,)

In [53]:
y = x[x].index

In [54]:
y

Index(['11676', '198711', '153662', '98391', '35859', '212898', '278418',
       '76352', '110973', '235105',
       ...
       '260183', '73681', '44296', '155916', '9856', '274808', '28634',
       '59727', '268622', '188951'],
      dtype='object', name='user_id', length=899)

In [55]:
ratings = ratings[ratings['user_id'].isin(y)]

In [56]:
ratings.head()

Unnamed: 0,user_id,ISBN,Rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8
1459,277427,0030615321,0
1460,277427,0060002050,0


In [57]:
ratings_with_books = ratings.merge(books, on ="ISBN")

In [58]:
ratings_with_books.head()

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...


In [59]:
ratings_with_books.shape

(487668, 8)

In [60]:
num_rating = ratings_with_books.groupby('Title')['Rating'].count().reset_index()

In [61]:
num_rating.head()

Unnamed: 0,Title,Rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [62]:
num_rating.rename(columns={"Rating":"Num_of_rating"},inplace=True)

In [63]:
num_rating.head()

Unnamed: 0,Title,Num_of_rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [64]:
final_rating = ratings_with_books.merge(num_rating, on= 'Title')

In [65]:
final_rating.head()

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url,Num_of_rating
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82


In [66]:
final_rating.sample()

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url,Num_of_rating
268655,30276,843950641,6,The Very Virile Viking: (Cartoon Cover),Sandra Hill,2003,Leisure Books,http://images.amazon.com/images/P/0843950641.0...,6


In [67]:
final_rating = final_rating[final_rating['Num_of_rating']>=50]

In [68]:
final_rating.sample(10)

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url,Num_of_rating
99891,16795,385721420,0,Three Junes,JULIA GLASS,2003,Anchor,http://images.amazon.com/images/P/0385721420.0...,84
4957,262399,399501487,0,Lord of the Flies,William Gerald Golding,1959,Perigee Trade,http://images.amazon.com/images/P/0399501487.0...,120
115278,17950,451202341,0,Back Roads,Tawni O'Dell,2001,Signet Book,http://images.amazon.com/images/P/0451202341.0...,118
186404,182085,671034057,10,Now You See Her,Linda Howard,1999,Pocket Books,http://images.amazon.com/images/P/0671034057.0...,52
9915,180957,553268880,0,The Prince of Tides,Pat Conroy,1987,Bantam Books,http://images.amazon.com/images/P/0553268880.0...,148
49204,78973,743467523,7,Dreamcatcher,Stephen King,2003,Pocket,http://images.amazon.com/images/P/0743467523.0...,161
1429,25981,312966091,10,Three To Get Deadly : A Stephanie Plum Novel (...,Janet Evanovich,1998,St. Martin's Paperbacks,http://images.amazon.com/images/P/0312966091.0...,105
6380,170229,439420105,0,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2002,Scholastic,http://images.amazon.com/images/P/0439420105.0...,183
132623,81045,312995423,10,Digital Fortress : A Thriller,Dan Brown,2003,St. Martin's Press,http://images.amazon.com/images/P/0312995423.0...,61
450,141710,60934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,108


In [69]:
final_rating.shape

(61853, 9)

In [70]:
final_rating.drop_duplicates(['user_id', 'Title'], inplace=True)

In [71]:
final_rating.shape

(59850, 9)

In [72]:
final_rating.sample()

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url,Num_of_rating
67907,55187,60175400,8,The Poisonwood Bible,Barbara Kingsolver,1999,HarperCollins,http://images.amazon.com/images/P/0060175400.0...,68


In [73]:
final_rating

Unnamed: 0,user_id,ISBN,Rating,Title,Author,Year,publisher,Img_url,Num_of_rating
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
1,3363,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
2,11676,002542730X,6,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
3,12538,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
4,13552,002542730X,0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,82
...,...,...,...,...,...,...,...,...,...
236701,255489,0553579983,7,And Then You Die,Iris Johansen,1998,Bantam,http://images.amazon.com/images/P/0553579983.0...,50
236702,256407,0553579983,0,And Then You Die,Iris Johansen,1998,Bantam,http://images.amazon.com/images/P/0553579983.0...,50
236703,257204,0553579983,0,And Then You Die,Iris Johansen,1998,Bantam,http://images.amazon.com/images/P/0553579983.0...,50
236704,261829,0553579983,0,And Then You Die,Iris Johansen,1998,Bantam,http://images.amazon.com/images/P/0553579983.0...,50


In [74]:
final_rating['Rating'] = pd.to_numeric(final_rating['Rating'], errors='coerce')

In [75]:
book_pivot = final_rating.pivot_table(columns='user_id', index='Title', values='Rating', aggfunc='mean')

In [76]:
book_pivot

user_id,100459,100644,100846,100906,101209,101305,101851,101876,102275,102359,...,95932,95991,96054,96448,97874,98297,98391,9856,98741,98758
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,,,,,,,,,,,...,,,,,,,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,0.0,,,,,,,9.0,,...,,,,,,,9.0,,,
4 Blondes,,,,,,,,,,,...,,,,,,,,,,
84 Charing Cross Road,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,,,,9.0,,8.0,,,,,...,,,,,,,,,,
You Belong To Me,,,,,,,,,0.0,,...,,,,,,,,,,
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,,,,,0.0,,,,,,...,,,,,,,,,,
Zoya,,,,,,,,,,,...,,,,,,,,,,


In [77]:
book_pivot.shape

(742, 888)

In [78]:
book_pivot.fillna(0, inplace=True)

In [79]:
book_pivot

user_id,100459,100644,100846,100906,101209,101305,101851,101876,102275,102359,...,95932,95991,96054,96448,97874,98297,98391,9856,98741,98758
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84 Charing Cross Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,9.0,0.0,8.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [80]:
from scipy.sparse import csr_matrix

In [81]:
book_sparse = csr_matrix(book_pivot)

In [82]:
book_sparse

<742x888 sparse matrix of type '<class 'numpy.float64'>'
	with 14942 stored elements in Compressed Sparse Row format>

In [83]:
from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(algorithm='brute')

In [84]:
model.fit(book_sparse)

In [85]:
distance, suggestion = model.kneighbors(book_pivot.iloc[237,:].values.reshape(1,-1), n_neighbors=6)

In [86]:
distance

array([[ 0.        , 68.78953409, 69.5413546 , 72.64296249, 76.83098333,
        77.28518616]])

In [87]:
suggestion

array([[237, 240, 238, 241, 184, 536]], dtype=int64)

In [88]:
for i in range(len(suggestion)):
    print(book_pivot.index[suggestion[i]])

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',
       'The Cradle Will Fall'],
      dtype='object', name='Title')


In [89]:
book_pivot.index[3]

'4 Blondes'

In [90]:
books_name = book_pivot.index

In [91]:
import pickle
pickle.dump(model, open('artifacts/model.pkl', 'wb'))
pickle.dump(books_name, open('artifacts/books_name.pkl', 'wb'))
pickle.dump(final_rating, open('artifacts/final_rating.pkl', 'wb'))
pickle.dump(book_pivot, open('artifacts/book_pivot.pkl', 'wb'))

In [92]:
def recommend_book(book_name):
    book_id= np.where(book_pivot.index == book_name)[0][0]
    distance, suggestion = model.kneighbors(book_pivot.iloc[book_id,:].values.reshape(1,-1), n_neighbors=6)

    for i in range(len(suggestion)):
        books = book_pivot.index[suggestion[i]]
        for j in books:
            print(j)
        
    

In [100]:
book_name = 'You Belong To Me'
recommend_book(book_name)

You Belong To Me
The Cradle Will Fall
Exclusive
Loves Music, Loves to Dance
While My Pretty One Sleeps
Before I Say Good-Bye
