In [3]:
import pandas as pd
from surprise import Reader, SVD, KNNBasic

df = pd.read_csv('goodreads_ratings.csv')
print(df.head())

#1. Print dataset size and examine column data types
print(df.dtypes)

#2. Distribution of ratings
print(df['rating'].value_counts())

#3. Filter ratings that are out of range
filtered_ratings = df[df['rating']!=0]
print(filtered_ratings.value_counts())

#4. Prepare data for surprise: build a Suprise reader object

reader = Reader(rating_scale= (1,5))

#5. Load `book_ratings` into a Surprise Dataset
from surprise import Dataset
data = Dataset.load_from_df(filtered_ratings[['user_id', 'book_id', 'rating']], reader)

#6. Create a 80:20 train-test split and set the random state to 7
from surprise.model_selection import train_test_split
trainset, testset = train_test_split(data, random_state= 7, test_size= .2)

#7. Use KNNBasice from Surprise to train a collaborative filter
from surprise import KNNBasic
knn = KNNBasic()
knn.fit(trainset)
test = knn.test(testset)

#8. Evaluate the recommender system
from surprise import accuracy
print('The accuracy of KNNBasic prediction model using RSME is - ' + 
str(accuracy.rmse(test)))
print('=============================================================')

# test SVD and see if it does better than KNNBasic
svd = SVD()
svd.fit(trainset)
test_svd = svd.test(testset)
print('The accuracy of SVD prediction model using RSME is - ' + 
str(accuracy.rmse(test_svd)))
print('=============================================================')

#9. Prediction on a user who gave the "The Three-Body Problem" a rating of 5
uid = '8842281e1d1347389f2ab93d60773d4d'
bid = 18245960

user_pred = knn.predict(uid, bid, r_ui=5, verbose=True )
print('=============================================================')

user_pred_svd = svd.predict(uid, bid, r_ui=5, verbose=True )

ModuleNotFoundError: No module named 'codecademylib3'