In [1]:
# import libraries
import pandas as pd
import numpy as np
from surprise import Dataset,Reader,KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [2]:
ratings = pd.read_csv('ml-100k/u.data',sep='\t', names=['user_id','item_id','ratings','timestamp']) # load dataset

In [3]:
ratings = ratings[['user_id','item_id','ratings']] # the columns we need to run our program

In [4]:
reader = Reader(rating_scale=(1,5))

In [5]:
# prepare dataset for surprise to use
data = Dataset.load_from_df(ratings[['user_id','item_id','ratings']],reader)

In [6]:
# split the dataset into a trainset and test set surprise object
trainset,testset = train_test_split(data, test_size=0.2, random_state= 42)

In [7]:
# define user-user collaborative filtering
similarity_options = {
    "cosine" : {
    'name' : 'cosine', # similarity metrics : [cosine]
    'user_based' : True # True = user-user CF, false = item-item CF
    },
    "pearson" : {
        'name' : 'pearson', # similarity metrics : [precision]
        'user_based' : True
    }
}

In [8]:
results = {}
for metric_name, metrics in similarity_options.items():
    algorithms = KNNBasic(sim_options = metrics)

    algorithms.fit(trainset)

    predictions = algorithms.test(testset)

    rmse = accuracy.rmse(predictions)

    results[metric_name] = rmse
    print('-'*50)

for name,rmse in results.items():
    print(f'{name} - {rmse}')

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.0194
--------------------------------------------------
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.0150
--------------------------------------------------
cosine - 1.0193536815834319
pearson - 1.0150350905205965
