In [4]:
from surprise import accuracy, Dataset, Reader, SVD, SVDpp, NMF
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
import pandas as pd




In [3]:
# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin("ml-100k")

In [5]:
# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=0.25)

# SVD algorithm

In [6]:
# We'll use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results
SVD_result = cross_validate(algo, data, measures=["RMSE", "MAE"], cv=5, verbose=True)
SVD_result = pd.DataFrame.from_dict(SVD_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9342  0.9407  0.9331  0.9373  0.9395  0.9370  0.0029  
MAE (testset)     0.7374  0.7405  0.7362  0.7370  0.7421  0.7386  0.0023  
Fit time          1.57    1.69    1.54    1.55    1.55    1.58    0.06    
Test time         0.18    0.18    0.30    0.20    0.23    0.22    0.05    


In [7]:
# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
SVD_rmse = accuracy.rmse(predictions)

RMSE: 0.9436


# SVD++ algorithm

In [8]:
# We'll use the famous SVD++ algorithm.
algo_pp = SVDpp()

# Run 5-fold cross-validation and print results
SVDpp_result = cross_validate(algo_pp, data, measures=["RMSE", "MAE"], cv=5, verbose=True)
SVDpp_result = pd.DataFrame.from_dict(SVDpp_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9229  0.9244  0.9218  0.9133  0.9185  0.9202  0.0040  
MAE (testset)     0.7224  0.7251  0.7228  0.7185  0.7214  0.7221  0.0021  
Fit time          10.26   9.78    9.17    9.18    9.08    9.49    0.46    
Test time         3.56    3.22    3.17    3.38    3.20    3.31    0.15    


In [9]:
algo_pp.fit(trainset)
predictions_pp = algo_pp.test(testset)

# Then compute RMSE
SVD_pp_rmse = accuracy.rmse(predictions_pp)

RMSE: 0.9274


# NMF algorithm

In [10]:
# We'll use the famous NMF algorithm.
algo_nmf = NMF()

# Run 5-fold cross-validation and print results
NMF_result = cross_validate(algo_nmf, data, measures=["RMSE", "MAE"], cv=5, verbose=True)
NMF_result = pd.DataFrame.from_dict(NMF_result).mean(axis=0)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9629  0.9607  0.9698  0.9617  0.9571  0.9624  0.0042  
MAE (testset)     0.7569  0.7546  0.7603  0.7552  0.7533  0.7561  0.0024  
Fit time          3.74    3.30    3.10    3.24    4.03    3.48    0.35    
Test time         0.35    0.17    0.32    0.16    0.17    0.23    0.08    


In [11]:
algo_nmf.fit(trainset)
predictions_nmf = algo_nmf.test(testset)

# Then compute RMSE
NMF_rmse = accuracy.rmse(predictions_nmf)

RMSE: 0.9738


# Surprise results

In [12]:
surprise_results = pd.DataFrame(columns=['SVD', 'SVDpp', 'NMF'])
surprise_results['SVD'] = SVD_result
surprise_results['SVDpp'] = SVDpp_result
surprise_results['NMF'] = NMF_result
surprise_results

Unnamed: 0,SVD,SVDpp,NMF
test_rmse,0.936965,0.920184,0.962417
test_mae,0.738632,0.722052,0.756068
fit_time,1.577313,9.494595,3.48276
test_time,0.218775,3.30589,0.232422


In [13]:
print('RMSE:')
print('SVD:', SVD_rmse)
print('SVDpp:', SVD_pp_rmse)
print('NMF:', NMF_rmse)

RMSE:
SVD: 0.9436303697877875
SVDpp: 0.9274014325427109
NMF: 0.9738416935330341
