In [8]:
import pandas as pd
from surprise import SVD
from surprise import dataset
from surprise.model_selection import cross_validate
from surprise import Reader
ratings = pd.read_csv("dataset/ratings_small.csv")

# You'll need to create a dummy reader
reader = Reader(line_format='user item rating timestamp', rating_scale=(1, 5))

# Also, a dummy Dataset class
class MyDataset(dataset.DatasetAutoFolds):

    def __init__(self, df, reader):

        self.raw_ratings = [(uid, iid, r, None) for (uid, iid, r) in
                            zip(df['userId'], df['movieId'], df['rating'])]
        self.reader=reader

data = MyDataset(ratings, reader)
# Use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8943  0.8900  0.8966  0.9017  0.9031  0.8971  0.0048  
MAE (testset)     0.6881  0.6856  0.6893  0.6925  0.6956  0.6902  0.0035  
Fit time          4.40    4.45    4.42    4.38    4.34    4.40    0.04    
Test time         0.13    0.12    0.23    0.12    0.12    0.14    0.04    


{'test_rmse': array([0.89427217, 0.88998826, 0.89663716, 0.90166955, 0.90306498]),
 'test_mae': array([0.6881216 , 0.68561423, 0.6892994 , 0.69253717, 0.6956244 ]),
 'fit_time': (4.4034202098846436,
  4.454824924468994,
  4.417496681213379,
  4.378246068954468,
  4.339135408401489),
 'test_time': (0.12765860557556152,
  0.12221217155456543,
  0.2254018783569336,
  0.12267208099365234,
  0.12169957160949707)}

In [18]:
from surprise import KNNBasic

# use Cosine
algo = KNNBasic(k= 20, sim_options={ "user_based" : True, "name": "msd" })
# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# use MSD
algo = KNNBasic(k = 100, sim_options={ "user_based" : True, "name": "msd" })
# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

# use Pearson
algo = KNNBasic(k = 250, sim_options={ "user_based" : True, "name": "msd" })
# Run 5-fold cross-validation and print results.
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9599  0.9582  0.9577  0.9683  0.9685  0.9625  0.0049  
MAE (testset)     0.7363  0.7357  0.7332  0.7425  0.7420  0.7379  0.0037  
Fit time          0.35    0.25    0.26    0.25    0.27    0.28    0.04    
Test time         1.20    1.17    1.17    1.16    1.26    1.19    0.04    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing s

{'test_rmse': array([0.98530913, 0.97353891, 0.96953022, 0.98142342, 0.9788628 ]),
 'test_mae': array([0.76374768, 0.75186394, 0.74887517, 0.7566233 , 0.75495162]),
 'fit_time': (0.2264420986175537,
  0.31958651542663574,
  0.25229644775390625,
  0.2981739044189453,
  0.24930405616760254),
 'test_time': (1.4117977619171143,
  1.4064154624938965,
  1.5429134368896484,
  1.4091429710388184,
  1.4019436836242676)}

In [24]:
for i in range(10, 20):
    algo = KNNBasic(k= i, sim_options={ "user_based" : True, "name": "msd" })
    # Run 5-fold cross-validation and print results.
    cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9609  0.9651  0.9658  0.9705  0.9669  0.9658  0.0031  
MAE (testset)     0.7343  0.7386  0.7392  0.7429  0.7392  0.7388  0.0028  
Fit time          0.31    0.26    0.27    0.27    0.27    0.28    0.02    
Test time         1.10    1.23    1.07    1.06    1.07    1.11    0.07    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing s

In [29]:
from surprise import KNNBasic

for i in range(60, 70):
    algo = KNNBasic(k= i, sim_options={ "user_based" : False, "name": "msd" })
    # Run 5-fold cross-validation and print results.
    cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)



Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9387  0.9318  0.9267  0.9285  0.9353  0.9322  0.0044  
MAE (testset)     0.7228  0.7184  0.7178  0.7138  0.7226  0.7191  0.0034  
Fit time          3.56    3.76    3.66    3.68    3.75    3.68    0.07    
Test time         5.43    5.68    5.75    5.71    5.68    5.65    0.11    
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing s

KeyboardInterrupt: 