In [1]:
import pandas as pd
import numpy as np
import random
from surprise import dump
from surprise import SVD
from surprise import Dataset, Reader
from surprise import accuracy
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_books = pd.read_csv('/.spyder-py3/files/input/books.csv')
df_books = df_books.drop_duplicates(subset = ['original_title'])
df_books = df_books.dropna(subset = ['original_title'])

In [3]:
df_ratings = pd.read_csv('/.spyder-py3/files/input/ratings.csv')
df_ratings = df_ratings.drop_duplicates()
df_ratings = df_ratings.dropna()

In [4]:
df_books = df_books[df_books.book_id.isin(df_ratings.book_id)]
df_ratings = df_ratings[df_ratings.book_id.isin(df_books.book_id)]

In [5]:
df = df_ratings.copy()

In [6]:
df = df.sample(frac = 1, random_state = 42)

threshold = int(.9 * len(df))
train_ratings = df[:threshold]
test_ratings = df[threshold:]                  

reader = Reader(rating_scale = (1, 5))
data = Dataset.load_from_df(train_ratings[['user_id','book_id','rating']], reader)

In [7]:
train_set = data.build_full_trainset()
#algo = SVD(n_epochs=10, verbose = True, random_state=42)
#algo.fit(train_set)


from surprise import dump
#dump.dump('/.spyder-py3/files/input/' + 'svd',algo = algo)
_, algo  = dump.load('/.spyder-py3/files/input/'  + 'svd')

In [8]:
test_data = Dataset.load_from_df(test_ratings[['user_id','book_id','rating']], reader).build_full_trainset()
test_set = test_data.build_testset()
predictions = algo.test(test_set)
print (accuracy.rmse(predictions))

RMSE: 0.8418
0.8418446275472864


In [9]:
id = 913
print('SVD Similar books to :', df_books[df_books.book_id == id]['original_title'].values[0])

iid = train_set.to_inner_iid(id)

dtypes = np.dtype([
          ('original_title', str),
          ('authors', str),
          ('sim', float),
          ])

simbooks = pd.DataFrame(np.empty(0, dtype=dtypes))

book_embedding = algo.qi
current_book = book_embedding[iid, :].reshape(1, -1)
total_books = book_embedding.shape[0]
for i in range(total_books):
  if i!= iid:
    sim = cosine_similarity(current_book, book_embedding[i,:].reshape(1, -1))
    rawid = train_set.to_raw_iid(i)
    book_title = df_books[df_books.book_id == rawid]['original_title'].values[0]
    authors = df_books[df_books.book_id == rawid]['authors'].values[0]

    simbooks = simbooks.append({'original_title' : book_title, 'authors' : authors, 
                      'sim' : sim }, ignore_index = True)
    
simbooks = simbooks.sort_values('sim', ascending = False)

simbooks.head(20)

SVD Similar books to : The Joy of Cooking


Unnamed: 0,original_title,authors,sim
8361,Castle Roogna,Piers Anthony,[[0.22868941007657373]]
6677,Just Like Heaven,Julia Quinn,[[0.22495189137348642]]
7492,A Theory of Justice,John Rawls,[[0.19892125680483588]]
4648,A Fistful of Charms,Kim Harrison,[[0.19661862783835354]]
5533,A Long Way From Chicago,Richard Peck,[[0.19628196306328455]]
8684,Chomp!,Carl Hiaasen,[[0.1947688888149388]]
5622,Calvin and Hobbes,"Bill Watterson, G.B. Trudeau",[[0.18854213347502094]]
6936,Miracles: A Preliminary Study,C.S. Lewis,[[0.18633307904752136]]
1891,"Alex Cross, Run",James Patterson,[[0.1808204864152476]]
1669,Same Kind of Different as Me,"Ron Hall, Denver Moore, Lynn Vincent",[[0.18075409799237563]]
