Here we are going to perform **User-User** Collaborative Filter.

In [1]:
# used for user-user collaborative filter based on n neighbours
from surprise import KNNWithMeans, Dataset, accuracy

from surprise.model_selection import train_test_split

In [2]:
# Load the movielens-100k dataset
data = Dataset.load_builtin('ml-100k')

In [3]:
# split into test and train data
trainset, testset = train_test_split(data, test_size=.15)

In [4]:
# Use user_based true/false to switch between user-based or item-based collaborative filtering
# search for 50 nearest neighbours, pearson for similarity, user-user filter
algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x28978d800a0>

In [5]:
# we can now query for specific predictions
uid = str(196)  # raw user id
iid = str(302)  # raw item id

In [6]:
# get a prediction for specific users and items.
pred = algo.predict(uid, iid, verbose=True)

user: 196        item: 302        r_ui = None   est = 4.07   {'actual_k': 50, 'was_impossible': False}


In [7]:
# user 196 should predict 4.06 for item 302 with it's calculations made from 50 neighbours

In [8]:
# run the trained model against the testset
test_pred = algo.test(testset)

In [9]:
# user_id, item_id, actual_rating, predicted_rating, actual_neighbours
test_pred

[Prediction(uid='851', iid='687', r_ui=2.0, est=2.262368782804778, details={'actual_k': 28, 'was_impossible': False}),
 Prediction(uid='7', iid='643', r_ui=4.0, est=4.97583081570997, details={'actual_k': 1, 'was_impossible': False}),
 Prediction(uid='888', iid='269', r_ui=5.0, est=4.717656835989856, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='178', iid='679', r_ui=4.0, est=3.120294096828221, details={'actual_k': 47, 'was_impossible': False}),
 Prediction(uid='42', iid='387', r_ui=3.0, est=3.429200690837354, details={'actual_k': 22, 'was_impossible': False}),
 Prediction(uid='666', iid='427', r_ui=4.0, est=4.347288168969287, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='64', iid='451', r_ui=2.0, est=3.3587989046652926, details={'actual_k': 50, 'was_impossible': False}),
 Prediction(uid='561', iid='607', r_ui=5.0, est=3.3158822260978202, details={'actual_k': 37, 'was_impossible': False}),
 Prediction(uid='289', iid='7', r_ui=4.0, est=3.1856

In [10]:
# get RMSE
print("User-based Model : Test Set")
accuracy.rmse(test_pred, verbose=True)

User-based Model : Test Set
RMSE: 0.9327


0.9327082979078056