In [1]:
import os
from surprise import SVD

from surprise import Reader
from surprise import Dataset

from surprise.model_selection import *
from surprise.prediction_algorithms import *

In [2]:
file_path = os.path.expanduser('./dataset/ratings_1.csv')
reader = Reader(line_format='user item rating', sep=',',
                rating_scale=[1, 5], skip_lines=1)
data = Dataset.load_from_file(file_path, reader=reader)

In [9]:
algo = NormalPredictor()
cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Evaluating RMSE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.5051  1.5088  1.5032  1.5061  1.5002  1.5047  0.0029  
Fit time          0.12    0.16    0.16    0.16    0.16    0.15    0.02    
Test time         0.18    0.19    0.18    0.19    0.18    0.19    0.00    


{'test_rmse': array([1.50505725, 1.50877884, 1.5031507 , 1.50612138, 1.50015873]),
 'fit_time': (0.1190023422241211,
  0.1557154655456543,
  0.16294384002685547,
  0.1559309959411621,
  0.15606188774108887),
 'test_time': (0.18471527099609375,
  0.18528294563293457,
  0.17889070510864258,
  0.1932811737060547,
  0.18289685249328613)}

In [8]:
bsl = {
    'method': "sgd",
    'learning_rate': 0.005,
    'n_epochs': 50, 
    'reg': 0.02,
}
algo = BaselineOnly(bsl_options=bsl, verbose=True)
cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Evaluating RMSE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9173  0.9158  0.9127  0.9162  0.9197  0.9163  0.0023  
Fit time          1.14    1.25    1.22    1.19    1.14    1.19    0.05    
Test time         0.13    0.14    0.14    0.13    0.13    0.13    0.00    


{'test_rmse': array([0.91727437, 0.91579655, 0.91270248, 0.91622557, 0.91967138]),
 'fit_time': (1.1370832920074463,
  1.2504956722259521,
  1.2215907573699951,
  1.1935179233551025,
  1.1350059509277344),
 'test_time': (0.13445305824279785,
  0.13799762725830078,
  0.13516998291015625,
  0.1336827278137207,
  0.1323986053466797)}

In [11]:
max_neighbors = 40
min_neighbors = 1
similarity_options = {
    'user_based': False, 
    'name': "cosine",
    'min_support': 3,
}

algo = KNNBasic(k=max_neighbors, min_k=min_neighbors,
                sim_options=similarity_options, verbose=True)

cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0065  1.0064  1.0117  1.0053  1.0050  1.0070  0.0024  
Fit time          3.69    3.57    3.84    3.74    3.73    3.71    0.09    
Test time         6.23    6.50    6.58    6.61    6.61    6.50    0.14    


{'test_rmse': array([1.00654113, 1.0064069 , 1.01171412, 1.00533748, 1.00501664]),
 'fit_time': (3.693671226501465,
  3.5678300857543945,
  3.8372340202331543,
  3.7411367893218994,
  3.734321117401123),
 'test_time': (6.226246356964111,
  6.495122671127319,
  6.579142808914185,
  6.606206893920898,
  6.608976364135742)}

In [12]:
algo = KNNWithMeans(k=max_neighbors, min_k=min_neighbors,
                    sim_options=similarity_options, verbose=True)

cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9203  0.9172  0.9109  0.9163  0.9179  0.9165  0.0031  
Fit time          4.02    4.29    4.15    3.95    4.22    4.13    0.13    
Test time         7.53    7.34    7.01    7.51    7.39    7.36    0.19    


{'test_rmse': array([0.92026258, 0.91718128, 0.91086821, 0.9162915 , 0.91787824]),
 'fit_time': (4.019234657287598,
  4.287554502487183,
  4.151025772094727,
  3.9452571868896484,
  4.222806215286255),
 'test_time': (7.528066873550415,
  7.344459533691406,
  7.009069204330444,
  7.513416528701782,
  7.394527196884155)}

In [13]:
algo = KNNBaseline(k=max_neighbors, min_k=min_neighbors,
                   sim_options=similarity_options, verbose=True)

cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9107  0.9102  0.9159  0.9185  0.9153  0.9141  0.0032  
Fit time          3.99    4.07    4.32    4.25    4.37    4.20    0.15    
Test time         7.06    6.91    7.26    7.76    7.73    7.34    0.34    


{'test_rmse': array([0.91074609, 0.9102309 , 0.91592942, 0.91852436, 0.91531341]),
 'fit_time': (3.990248203277588,
  4.073612689971924,
  4.322655200958252,
  4.253809452056885,
  4.367433547973633),
 'test_time': (7.062510967254639,
  6.913889408111572,
  7.259570837020874,
  7.757489442825317,
  7.729433059692383)}

In [14]:
number_of_factors = 100
use_together_with_baseline_estimator = True  
number_of_epochs = 20
learning_rate = .005
lambda_parameter = .02

algo = SVD(n_factors=number_of_factors,
                   biased=use_together_with_baseline_estimator,
                   n_epochs=number_of_epochs,
                   lr_all=learning_rate,
                   reg_all=lambda_parameter,
                   verbose=True)

cross_validate(algo, data, measures=['RMSE'], cv=5, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

{'test_rmse': array([0.90570575, 0.90225014, 0.91202103, 0.91051584, 0.91307763]),
 'fit_time': (6.812334775924683,
  6.714956521987915,
  6.8070290088653564,
  6.773496627807617,
  6.962108135223389),
 'test_time': (0.22855401039123535,
  0.21863055229187012,
  0.22029733657836914,
  0.28783655166625977,
  0.30803823471069336)}