In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import warnings
from surprise import SVD, model_selection, accuracy

In [8]:
from surprise import Reader, Dataset

In [6]:
reduced_books_users_ratings = pd.read_csv("C:/University of Chicago/Machine Learning/Project/Machine-Learning-Book-Ratings/data/clean/reduced_books_users_ratings.csv")

In [28]:
books_users_ratings = pd.read_csv("C:/University of Chicago/Machine Learning/Project/Machine-Learning-Book-Ratings/data/clean/books_users_ratings.csv")

In [7]:
user_item_rating = reduced_books_users_ratings[['user_id', 'unique_isbn', 'book_rating']]
user_item_rating.head()

Unnamed: 0,user_id,unique_isbn,book_rating
0,11676,038550120X,10
1,11676,0671537458,8
2,11676,0679776818,8
3,11676,0684867621,3
4,11676,8437606322,8


In [10]:
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(user_item_rating, reader)

In [13]:
#splitting into train and test
train_data, test_data = model_selection.train_test_split(data, test_size=0.40)

In [14]:
#initiating the SVD model 
model = SVD()

In [16]:
# Train the algorithm on the training set, and predict ratings for the test set
model.fit(train_data)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2c79ae587f0>

In [18]:
predictions = model.test(test_data)

In [19]:
# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 1.5894


1.5894414751943375

In [20]:
# to get the top predictions for each test user

In [21]:
from collections import defaultdict

def get_top_n(predictions, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
        
    return top_n

In [24]:
def get_reading_list(userid):
    """
    Retrieve full book titles from full 'books_users_ratings' dataframe
    """
    reading_list = defaultdict(list)
    top_n = get_top_n(predictions, n=10)
    for n in top_n[userid]:
        book, rating = n
        title = books_users_ratings.loc[books_users_ratings.unique_isbn==book].book_title.unique()[0]
        reading_list[title] = rating
    return reading_list

In [26]:
test_data

[(104211, '0446358576', 9.0),
 (203213, '0743431014', 8.0),
 (111637, '1400034779', 7.0),
 (239740, '0061097101', 8.0),
 (262974, '0061097853', 8.0),
 (66953, '0380717018', 8.0),
 (204457, '0316789089', 10.0),
 (16795, '0425077837', 5.0),
 (206567, '0061095540', 8.0),
 (229703, '0061093092', 8.0),
 (207971, '1573226521', 9.0),
 (58382, '0671793489', 8.0),
 (183995, '0449000265', 8.0),
 (223145, '0684874350', 5.0),
 (178199, '0156007754', 10.0),
 (70302, '0385324170', 8.0),
 (26620, '0312966970', 7.0),
 (16795, '0671025333', 9.0),
 (224923, '0399133143', 7.0),
 (215986, '0375703764', 9.0),
 (193966, '0345446860', 5.0),
 (225449, '0671027581', 8.0),
 (200508, '0140281649', 9.0),
 (14422, '0020198906', 9.0),
 (225199, '0553262645', 6.0),
 (12569, '0316969680', 7.0),
 (244420, '055311073X', 10.0),
 (266457, '0804108749', 8.0),
 (1409, '042517736X', 9.0),
 (136205, '0515102652', 3.0),
 (93426, '067101420X', 5.0),
 (6251, '0446675504', 10.0),
 (123544, '0345351525', 4.0),
 (209272, '07434203

In [29]:
get_reading_list(111637)

defaultdict(list,
            {'Under the Banner of Heaven : A Story of Violent Faith': 8.078747897952624,
             'Patron Saint of Liars : A Novel': 7.224003081635493,
             "CORELLI'S MANDOLIN : A Novel": 6.955188711845039,
             "The No. 1 Ladies' Detective Agency (Today Show Book Club #8)": 6.921303758306289,
             'Talk Before Sleep': 6.901411779505289,
             'Waiting (Vintage International)': 6.84476629556454,
             'Falling Angels': 6.806761975141931,
             'The Stone Diaries': 6.704460978282395})