In [22]:
import pandas as pd 

from surprise import Reader 
book_ratings = pd.read_csv('goodreads_ratings.csv') 
print(book_ratings.head()) 

#1. Print dataset size and examine column data types 
print(book_ratings.describe()) 
print(book_ratings.info()) 

#2. Distribution of ratings 
print(book_ratings.rating.value_counts()) 

#3. Filter ratings that are out of range 
book_ratings = book_ratings[book_ratings['rating']!=0] 
print(book_ratings.rating.value_counts()) 

                            user_id   book_id  \
0  d089c9b670c0b0b339353aebbace46a1   7686667   
1  6dcb2c16e12a41ae0c6c38e9d46f3292  18073066   
2  244e0ce681148a7586d7746676093ce9  13610986   
3  73fcc25ff29f8b73b3a7578aec846394  27274343   
4  f8880e158a163388a990b64fec7df300  11614718   

                          review_id  rating  \
0  3337e0e75701f7f682de11638ccdc60c       3   
1  7201aa3c1161f2bad81258b6d4686c16       5   
2  07a203f87bfe1b65ff58774667f6f80d       5   
3  8be2d87b07098c16f9742020ec459383       1   
4  a29c4ba03e33ad073a414ac775266c5f       4   

                                         review_text  \
0  Like Matched, this book felt like it was echoi...   
1  WOW again! 4,5 Stars \r\n So i wont forget to ...   
2  The second novel was hot & heavy. Not only in ...   
3  What a maddening waste of time. And I unfortun...   
4  4.5 stars! \r\n This was an awesome read! \r\n...   

                       date_added                    date_updated  \
0  Fri Apr 29 14

In [23]:
#4. Prepare data for surprise: build a Suprise reader object 
from surprise import Reader 
reader = Reader(rating_scale=(1, 5)) 

#5. Load book_ratings into a Surprise Dataset 
from surprise import Dataset 
Surprise = Dataset.load_from_df(book_ratings[['user_id', 'book_id', 'rating']], reader) 

#6. Create a 80:20 train-test split and set the random state to 7 
from surprise.model_selection import train_test_split 
trainset,testset = train_test_split(Surprise,test_size=0.2,random_state=7) 

#7. Use KNNBasice from Surprise to train a collaborative filter 
from surprise import KNNBasic 
recommender = KNNBasic() 
recommender.fit(trainset) 

#8. Evaluate the recommender system 
from surprise import accuracy 
predictions = recommender.test(testset) 
accuracy.rmse(predictions) 

#9. Prediction on a user who gave the "The Three-Body Problem" a rating of 5 
print(recommender.predict('8842281e1d1347389f2ab93d60773d4d', '18007564').est)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.1105
3.8250739644970415


In [37]:
#10. Tuning hyperparameters
k_params = [10,20,40]
rmse_results = {}
predictions_results = {}

for i in k_params:
    recommender_2 = KNNBasic(k=i)
    recommender_2.fit(trainset)
    predictions_2 = recommender_2.test(testset) 
    rmse_results[i] = accuracy.rmse(predictions_2)
    predictions_results[i] = recommender_2.predict('8842281e1d1347389f2ab93d60773d4d', '18007564').est
    print(f"For k={i}, RMSE: {rmse_results[i]}, Prediction: {predictions_results[i]}")

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.1105
For k=10, RMSE: 1.110471008157185, Prediction: 3.8250739644970415
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.1105
For k=20, RMSE: 1.110471008157185, Prediction: 3.8250739644970415
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 1.1105
For k=40, RMSE: 1.110471008157185, Prediction: 3.8250739644970415
