## Collaborative Filtering
#### Model Based Approach

In [3]:
import pandas as pd
# import SVD from surprise
from surprise import SVD
# # import dataset from surprise
from surprise import Dataset
from surprise import Reader
# import accuracy from surprise
from surprise import accuracy

# import train_test_split from surprise.model_selection
from surprise.model_selection import train_test_split
# import GridSearchCV from surprise.model_selection
from surprise.model_selection import GridSearchCV
# import cross_validate from surprise.model_selection
from surprise.model_selection import cross_validate

We will be working with the [same data](https://drive.google.com/file/d/1WvTmAfO09TCX7xp7uu06__ziic7JnrL5/view?usp=sharing) we used in the previous exercise.

In [4]:
book_ratings = pd.read_csv('~/Downloads/BX-CSV-Dump/BX-Book-Ratings.csv',sep=";", encoding="latin")

In [5]:
book_ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


* create surprise dataset from book_ratings

In [6]:
reader = Reader(rating_scale=(0, 10))
# Loads Pandas dataframe
data = Dataset.load_from_df(book_ratings, reader)

* split data to train and test set, use test size 15%

In [7]:
train_data, test_data = train_test_split(data, test_size=0.15, random_state=42)

* Use SVD (with default settings) to create recommendations for each user
    - print default model's rmse that was computed on the test set (using object accuracy we imported in the beginning)

In [8]:
model = SVD()

In [None]:
model.fit(train_data)
predictions = model.test(test_data)
rmse = accuracy.rmse(predictions, verbose=False)
print('RMSE:', rmse)
#user_recommendations = {}
#for uid in book_ratings['User-ID'].unique():
 #   user_recommendations[uid] = model.predict(uid, list(book_ratings['ISBN'].unique()))
user_recommendations = {}
for uid in book_ratings['User-ID'].unique():
    user_recommendations[uid] = {}
    for isbn in book_ratings['ISBN'].unique():
        user_recommendations[uid][isbn] = model.predict(uid, isbn).est
user_recommendations

RMSE: 3.5005846148104247


* create parameters grid, use this params:
* 'n_factors': [110, 120, 140, 160]
* 'reg_all': [0.08, 0.1, 0.15]

In [None]:
param_grid = {'n_factors':[110, 120, 140, 160], 'reg_all': [0.08, 0.1, 0.15]}

* instantiate GridSearch with SVD as model, our pre-defined parameter grid and rmse and mae as evaluation metrics

In [None]:
gs = GridSearch(SVD, param_grid, measures=['rmse', 'mae'])

* fit GridSearch

In [None]:
gs.fit(train_data)

* print best RMSE score from training

In [None]:
print(gs.best_score['rmse'])

* predict test set with optimal model based on `RMSE`

In [None]:
best_model = gs.best_estimator['rmse']

In [None]:
new_predictions = best_model.test(test_data)

* print optimal model's RMSE that was computed on test set
    - is it better than the default parameters?

In [None]:
rmse = accuracy.rmse(predictions)