In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv("Preprocessed_data.csv")

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,user_id,location,age,isbn,rating,book_title,book_author,year_of_publication,publisher,img_s,img_m,img_l,Summary,Language,Category,city,state,country
0,0,2,"stockton, california, usa",18.0,195153448,0,Classical Mythology,Mark P. O. Morford,2002.0,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,Provides an introduction to classical myths pl...,en,['Social Science'],stockton,california,usa
1,1,8,"timmins, ontario, canada",34.7439,2005018,5,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],timmins,ontario,canada
2,2,11400,"ottawa, ontario, canada",49.0,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],ottawa,ontario,canada
3,3,11676,"n/a, n/a, n/a",34.7439,2005018,8,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],,,
4,4,41385,"sudbury, ontario, canada",34.7439,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,"In a small town in Canada, Clara Callan reluct...",en,['Actresses'],sudbury,ontario,canada


In [4]:
data.drop(['Unnamed: 0',  'age', 
           'publisher','img_s', 'img_m', 'img_l', 'Summary', 'Language'], axis = 1, inplace = True)

In [5]:
data.head()

Unnamed: 0,user_id,location,isbn,rating,book_title,book_author,year_of_publication,Category,city,state,country
0,2,"stockton, california, usa",195153448,0,Classical Mythology,Mark P. O. Morford,2002.0,['Social Science'],stockton,california,usa
1,8,"timmins, ontario, canada",2005018,5,Clara Callan,Richard Bruce Wright,2001.0,['Actresses'],timmins,ontario,canada
2,11400,"ottawa, ontario, canada",2005018,0,Clara Callan,Richard Bruce Wright,2001.0,['Actresses'],ottawa,ontario,canada
3,11676,"n/a, n/a, n/a",2005018,8,Clara Callan,Richard Bruce Wright,2001.0,['Actresses'],,,
4,41385,"sudbury, ontario, canada",2005018,0,Clara Callan,Richard Bruce Wright,2001.0,['Actresses'],sudbury,ontario,canada


In [6]:
data.drop(['location',  'city', 
           'state','state', 'country'], axis = 1, inplace = True)

In [7]:
data.head()

Unnamed: 0,user_id,isbn,rating,book_title,book_author,year_of_publication,Category
0,2,195153448,0,Classical Mythology,Mark P. O. Morford,2002.0,['Social Science']
1,8,2005018,5,Clara Callan,Richard Bruce Wright,2001.0,['Actresses']
2,11400,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,['Actresses']
3,11676,2005018,8,Clara Callan,Richard Bruce Wright,2001.0,['Actresses']
4,41385,2005018,0,Clara Callan,Richard Bruce Wright,2001.0,['Actresses']


In [8]:
# Collaborative Filtering Model (SVD)
reader = Reader(rating_scale=(0, 10))
data_surprise = Dataset.load_from_df(data[['user_id', 'isbn', 'rating']], reader)
trainset = data_surprise.build_full_trainset()
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1b963ce58d0>

In [9]:
# TF-IDF Vectorization for book titles
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['book_title'])

In [19]:
import random

def get_similar_books(book_title, n=5):
    # Find similar books based on collaborative filtering
    book_id = data[data['book_title'] == book_title]['isbn'].iloc[0]
    # Select a random user ID from the trainset
    user_id = random.choice(trainset.all_users())
    test_data = [(user_id, book_id, 0) for _ in range(algo.trainset.n_items)]  # Use the selected user ID and zero rating
    predictions = algo.test(test_data)  # Predict ratings for all items
    sim_books_collab = [pred.iid for pred in predictions]  # Extract item IDs
    
    # Find similar books based on title similarity
    book_idx = data[data['book_title'] == book_title].index[0]
    title_sim = cosine_similarity(tfidf_matrix[book_idx], tfidf_matrix)
    sim_books_title = [trainset.to_raw_iid(idx) for idx in title_sim.argsort()[0][-n-1:-1][::-1]]  # Convert indices to raw item IDs
    
    # Merge and deduplicate recommendations
    recommendations = list(set(sim_books_collab + sim_books_title))  # Convert set to list
    
    # Return book titles
    return data.loc[data['isbn'].isin(recommendations), 'book_title']


In [20]:
# Example usage
book_title_input = "Clara Callan"
recommended_books = get_similar_books(book_title_input)
print("Recommended books based on", book_title_input)
print(recommended_books)

Recommended books based on Clara Callan
1                                          Clara Callan
2                                          Clara Callan
3                                          Clara Callan
4                                          Clara Callan
5                                          Clara Callan
6                                          Clara Callan
7                                          Clara Callan
8                                          Clara Callan
9                                          Clara Callan
10                                         Clara Callan
11                                         Clara Callan
12                                         Clara Callan
13                                         Clara Callan
14                                         Clara Callan
68                                      PLEADING GUILTY
69                                      PLEADING GUILTY
70                                      PLEADING GUILTY
73      