In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import warnings
warnings.filterwarnings("ignore")

In [2]:
dataset = pd.read_csv('preprocessed_ratings.csv', sep=',', error_bad_lines=False, encoding='latin-1')

In [3]:
dataset

Unnamed: 0,User_ID,ISBN,Book_Rating,Book_Title,Book-Author,Year_Of_Publication,Publisher,Location,Age
0,53,451,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,"strafford, missouri, usa",34.0
1,53,280,0,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown","strafford, missouri, usa",34.0
2,53,647,0,The Da Vinci Code,Dan Brown,2003,Doubleday,"strafford, missouri, usa",34.0
3,53,2028,0,Wild Animus,Rich Shapero,2004,Too Far,"strafford, missouri, usa",34.0
4,53,241,0,Four To Score (A Stephanie Plum Novel),Janet Evanovich,1999,St. Martin's Paperbacks,"strafford, missouri, usa",34.0
...,...,...,...,...,...,...,...,...,...
137568,482,1739,8,Seven Habits Of Highly Effective People,Stephen R. Covey,1990,Free Press,"n/a, channel islands, guernsey",35.0
137569,482,892,0,Illusions: The Adventures of a Reluctant Messiah,Richard Bach,1994,Dell Publishing Company,"n/a, channel islands, guernsey",35.0
137570,1666,16,9,The Professor and the Madman,Simon Winchester,1998,HarperCollins Publishers,"kansas city, missouri, usa",44.0
137571,1666,1705,5,"ROAD LESS TRAVELED : A New Psychology of Love,...",M. Scott Peck,1980,Touchstone,"kansas city, missouri, usa",44.0


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137573 entries, 0 to 137572
Data columns (total 9 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   User_ID              137573 non-null  int64  
 1   ISBN                 137573 non-null  int64  
 2   Book_Rating          137573 non-null  int64  
 3   Book_Title           137573 non-null  object 
 4   Book-Author          137573 non-null  object 
 5   Year_Of_Publication  137573 non-null  int64  
 6   Publisher            137573 non-null  object 
 7   Location             137573 non-null  object 
 8   Age                  105405 non-null  float64
dtypes: float64(1), int64(4), object(4)
memory usage: 7.3+ MB


In [6]:
books = dataset.filter(['Book_Title' , 'Book-Author', 'Year_Of_Publication','Publisher']).copy()
books = books.drop_duplicates(subset='Book_Title', keep='first')

users = dataset.filter(['User_ID', 'Location', 'Age']).copy()
users = users.drop_duplicates(subset='User_ID', keep='first')

In [7]:
book_str = books.to_string(header=False,index=False).split('\n')
print(book_str)



In [8]:
tfidf = TfidfVectorizer(stop_words='english')
books_matrix = tfidf.fit_transform(book_str)

In [9]:
books_matrix.shape

(1913, 3502)

In [10]:
similarity_matrix = linear_kernel(books_matrix,books_matrix)

In [11]:
mapping = pd.Series(books.index,index = books['Book_Title'])

In [12]:
def recommend_books(book):
    book_index = mapping[book]
    #get similarity values with other books
    #similarity_score is the list of index and similarity matrix
    similarity_score = list(enumerate(similarity_matrix[book_index]))#sort in descending order the similarity score of book inputted with all the other books
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)# Get the scores of the 15 most similar movies. Ignore the first movie.
    similarity_score = similarity_score[1:15]#return movie names using the mapping series
    book_indices = [i[0] for i in similarity_score]
    return (books['Book_Title'].iloc[book_indices])

In [13]:
recommend_books('Flesh Tones: A Novel')

3836                                      Flesh and Blood
1980                                          Rose Madder
2139    Back When We Were Grownups : A Novel (Ballanti...
697                                  Name of the Rose-Nla
4904    What We Keep : A Novel (Ballantine Reader's Ci...
1638                                         What We Keep
336                                        P Is for Peril
71                                        The Murder Book
62      Big Stone Gap: A Novel (Ballantine Reader's Ci...
1104     The Diary of Ellen Rimbauer: My Life at Rose Red
77      Big Cherry Holler: A Big Stone Gap Novel (Ball...
69                                            The Surgeon
1330                             Billy Straight : A Novel
337                                         Summer Island
Name: Book_Title, dtype: object