In [68]:
from surprise import Reader, Dataset, KNNBasic, accuracy, SVD
from surprise.model_selection import KFold
from surprise.model_selection.validation import cross_validate
import pandas as pd
import os

In [69]:
# read in data
ratings_df = pd.read_csv('./archive/ratings_small.csv')
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

In [70]:
# perform 5-fold cross-validation using KNNBasic
kf = KFold(n_splits=5)
algo = KNNBasic()
for trainset, testset in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    accuracy.rmse(predictions, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9792
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9679
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9639
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9540
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9774


In [80]:
# unbiased SVD leads to PMF
algo = SVD(biased=False)

# cross_validate() uses n-fold cross-validation based on what's passed
# to cv
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

RMSE Mean Accuracy: 1.0084536797587038
MAE Mean Accuracy: 0.7790472175239379


In [91]:
# user-based collaborative filtering
# using MSD similarity
sim_options = {
    "name": "MSD",
    "user_based": True
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9672628841460229
MAE Mean Accuracy: 0.74393910466787


In [92]:
# user-based collaborative filtering
# using cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": True
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9678243379727766
MAE Mean Accuracy: 0.7440532085961062


In [93]:
# user based collaborative filtering
# using pearson similarity
sim_options = {
    "name": "pearson",
    "user_based": True
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9670517665931964
MAE Mean Accuracy: 0.7438789048639068


In [94]:
# item-based collaborative filtering
# using MSD
sim_options = {
    "name": "MSD",
    "user_based": False
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9667711278504632
MAE Mean Accuracy: 0.7433348437771021


In [95]:
# Item-based collaborative filteirng
# using cosine similarity
sim_options = {
    "name": "cosine",
    "user_based": False
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9683757085875271
MAE Mean Accuracy: 0.7443034956169591


In [97]:
# Item-base collaborative filtering
# using pearson similarity
sim_options = {
    "name": "pearson",
    "user_based": False
}
algo = KNNBasic(sim_optons=sim_options)
cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
rmse_mean = cv["test_rmse"].mean()
mae_mean = cv["test_mae"].mean()
print(f'RMSE Mean Accuracy: {rmse_mean}')
print(f'MAE Mean Accuracy: {mae_mean}')

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE Mean Accuracy: 0.9676482405729999
MAE Mean Accuracy: 0.7441223069203552


In [99]:
# Plotting Mean RMSE Accuracy and MAE Mean Accuracy
# using different values of k for user-based
# collaborative filtering.
sim_options = {
    "user_based": True
}
k_vals = [k for k in range(5, 45, 5)]
rmse_means = []
mae_means = []
for k in k_vals:
    algo = KNNBasic(k=k, sim_options=sim_options)
    cv = cross_validate(algo, data, measures=['rmse', 'mae'], cv=5)
    rmse_mean = cv[""]

IndentationError: expected an indented block (3048962533.py, line 8)