In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import warnings
from surprise import SVD, model_selection, accuracy

In [3]:
from surprise import Reader, Dataset

In [4]:
reduced_books_users_ratings = pd.read_csv("data/clean/reduced_books_users_ratings.csv")

In [6]:
books_users_ratings = pd.read_csv("data/clean/books_users_ratings.csv")

In [7]:
user_item_rating = reduced_books_users_ratings[['user_id', 'unique_isbn', 'book_rating']]
user_item_rating.head()

Unnamed: 0,user_id,unique_isbn,book_rating
0,11676,038550120X,10
1,11676,0671537458,8
2,11676,0679776818,8
3,11676,0684867621,3
4,11676,8437606322,8


In [8]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(user_item_rating, reader)

In [9]:
#splitting into train and test
train_data, test_data = model_selection.train_test_split(data, test_size=0.40)

In [10]:
#initiating the SVD model 
model = SVD()

In [11]:
# Train the algorithm on the training set, and predict ratings for the test set
model.fit(train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11929a1d0>

In [14]:
predictions = model.test(test_data)

In [15]:
# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 1.6011


1.6011169503342184

In [20]:
# to get the top predictions for each test user

In [16]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

In [17]:
def get_reading_list(userid):
    """
    Retrieve full book titles from full 'books_users_ratings' dataframe
    """
    reading_list = defaultdict(list)
    top_n = get_top_n(predictions, n=10)
    for n in top_n[userid]:
        book, rating = n
        title = books_users_ratings.loc[books_users_ratings.unique_isbn==book].book_title.unique()[0]
        reading_list[title] = rating
    return reading_list

In [18]:
test_data

[(190708, '0440219248', 10.0),
 (49061, '0385260075', 8.0),
 (231210, '0140351310', 10.0),
 (222492, '0449702766', 5.0),
 (143175, '060980619X', 10.0),
 (16795, '0380703882', 7.0),
 (187256, '038550120X', 9.0),
 (222296, '0440901588', 10.0),
 (161252, '0385504209', 7.0),
 (14521, '1566190932', 10.0),
 (123160, '0671708635', 6.0),
 (248718, '0688156126', 8.0),
 (229313, '1573225517', 10.0),
 (140974, '1400031362', 8.0),
 (216435, '0553209671', 10.0),
 (44985, '0553382195', 7.0),
 (112001, '0440235154', 5.0),
 (89014, '0060192704', 9.0),
 (268032, '0449221504', 8.0),
 (4149, '0451180410', 9.0),
 (142062, '0061097314', 10.0),
 (148378, '0061097314', 8.0),
 (7346, '0836210263', 7.0),
 (182085, '0380782340', 9.0),
 (17667, '0451409973', 9.0),
 (33958, '0394179692', 10.0),
 (116090, '0060959037', 8.0),
 (136252, '0679454489', 8.0),
 (154070, '0671021001', 8.0),
 (78448, '0425140032', 8.0),
 (258534, '0446606189', 8.0),
 (46907, '0553279378', 7.0),
 (129716, '0062502174', 10.0),
 (110771, '06

In [19]:
get_reading_list(111637)

defaultdict(list,
            {'Morality for Beautiful Girls (No.1 Ladies Detective Agency)': 7.254020961321732,
             'Under the Banner of Heaven : A Story of Violent Faith': 7.166350978970218,
             'The Amazing Adventures of Kavalier &amp; Clay': 7.134942279308458,
             'Four To Score (A Stephanie Plum Novel)': 7.085008481009208,
             "The Kalahari Typing School for Men (No. 1 Ladies' Detective Agency)": 6.996325259316454,
             "The No. 1 Ladies' Detective Agency (Today Show Book Club #8)": 6.9785704659661825,
             "A Cup of Tea (Ballantine Reader's Circle)": 6.732335999957187,
             'The Book of Illusions: A Novel': 6.5298317954404315,
             'The Stone Diaries': 5.952582131444568,
             'Prague : A Novel': 5.4545981642905295})