<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#1.-load-a-dataset-from-a-file" data-toc-modified-id="1.-load-a-dataset-from-a-file-1">1. load a dataset from a file</a></span></li><li><span><a href="#2.-KNNBasic" data-toc-modified-id="2.-KNNBasic-2">2. KNNBasic</a></span></li><li><span><a href="#3.-KNNWithMeans" data-toc-modified-id="3.-KNNWithMeans-3">3. KNNWithMeans</a></span></li><li><span><a href="#4.-KNNWithZScore" data-toc-modified-id="4.-KNNWithZScore-4">4. KNNWithZScore</a></span></li><li><span><a href="#5.-KNNBaseline" data-toc-modified-id="5.-KNNBaseline-5">5. KNNBaseline</a></span></li></ul></div>

In [1]:
import pandas as pd
from surprise import KNNBasic,KNNWithMeans,KNNWithZScore,KNNBaseline
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import KFold

## 1. load a dataset from a file

In [2]:
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('./ratings.csv', reader=reader)
train_set = data.build_full_trainset()

## 2. KNNBasic

In [3]:
sim_options = {'name': 'cosine',
               'user_based': False  # compute  similarities between items
               }
algo = KNNBasic(sim_options=sim_options)
kf = KFold(n_splits=3)

In [4]:
%%time
for trainset, testset in kf.split(data):
    
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    
    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9635
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9600
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.9625
Wall time: 19min 37s


## 3. KNNWithMeans

In [5]:
algo = KNNWithMeans(sim_options=sim_options)

In [6]:
%%time
for trainset, testset in kf.split(data):
    
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    
    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8698
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8671
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8704
Wall time: 18min 52s


## 4. KNNWithZScore

In [7]:
algo = KNNWithZScore(sim_options=sim_options)

In [8]:
%%time
for trainset, testset in kf.split(data):
    
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    
    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8699
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8702
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8714
Wall time: 25min 37s


## 5. KNNBaseline

In [9]:
algo = KNNBaseline(sim_options=sim_options)

In [10]:
%%time
for trainset, testset in kf.split(data):
    
    # train and test algorithm
    algo.fit(trainset)
    predictions = algo.test(testset)
    
    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8673
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8686
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.8694
Wall time: 19min 35s
