# Date: 27.06.2020
# MovieLens: Testing (Surprise)
# Coder: Maksym Chernozhukov

In [1]:
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate


# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

# We'll use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9305  0.9339  0.9340  0.9438  0.9345  0.9353  0.0045  
MAE (testset)     0.7358  0.7385  0.7363  0.7414  0.7352  0.7374  0.0023  
Fit time          7.96    6.72    5.53    8.55    7.28    7.21    1.04    
Test time         0.30    0.21    0.28    0.18    0.29    0.25    0.05    


{'test_rmse': array([0.93050395, 0.93390317, 0.93400579, 0.94383641, 0.93445526]),
 'test_mae': array([0.73583745, 0.73846379, 0.73628838, 0.74144036, 0.7351755 ]),
 'fit_time': (7.962108135223389,
  6.717039108276367,
  5.532292127609253,
  8.545579671859741,
  7.278312921524048),
 'test_time': (0.2962071895599365,
  0.2050468921661377,
  0.27628040313720703,
  0.18151116371154785,
  0.286238431930542)}

In [2]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

# Sample random trainset and testset
trainset, testset = train_test_split(data, test_size=.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 0.9383


0.9382576983155428

In [3]:
from surprise import SVD
from surprise import Dataset

# Load the movielens-100k dataset
data = Dataset.load_builtin('ml-100k')

# Retrieve the trainset.
trainset = data.build_full_trainset()

# Build an algorithm, and train it.
algo = SVD()
algo.fit(trainset)

uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

for i in range(10):
    uid = str(i)
    iid = str(i)
    pred = algo.predict(iid, iid, r_ui=0, verbose=True)

user: 196        item: 302        r_ui = 4.00   est = 3.84   {'was_impossible': False}
user: 0          item: 0          r_ui = 0.00   est = 3.53   {'was_impossible': False}
user: 1          item: 1          r_ui = 0.00   est = 4.32   {'was_impossible': False}
user: 2          item: 2          r_ui = 0.00   est = 3.28   {'was_impossible': False}
user: 3          item: 3          r_ui = 0.00   est = 2.82   {'was_impossible': False}
user: 4          item: 4          r_ui = 0.00   est = 4.20   {'was_impossible': False}
user: 5          item: 5          r_ui = 0.00   est = 2.62   {'was_impossible': False}
user: 6          item: 6          r_ui = 0.00   est = 3.77   {'was_impossible': False}
user: 7          item: 7          r_ui = 0.00   est = 4.42   {'was_impossible': False}
user: 8          item: 8          r_ui = 0.00   est = 4.36   {'was_impossible': False}
user: 9          item: 9          r_ui = 0.00   est = 4.55   {'was_impossible': False}


In [28]:
from surprise import SVD
from surprise import Dataset
from surprise import KNNBaseline

# Load the movielens-100k dataset
data = Dataset.load_builtin('ml-1m')

# Retrieve the trainset.
trainset, testset = train_test_split(data, test_size=.1)

# Build an algorithm, and train it.
knn = KNNBaseline(k=60)
svd = SVD(n_factors=10, n_epochs=30)

# Fit data
knn.fit(trainset)
svd.fit(trainset)

# Test
knn_predictions = knn.test(testset)
svd_predictions = svd.test(testset)

# Then compute RMSE
accuracy.rmse(knn_predictions)
accuracy.rmse(svd_predictions)

uid = str(196)
iid = str(302)

# get a prediction for specific users and items.
knn_pred = knn.predict(uid, iid, r_ui=4, verbose=True)
svd_pred = svd.predict(uid, iid, r_ui=4, verbose=True)

print(knn_pred[3])
print(svd_pred[3])
print(round(knn_pred[3], 3))

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.8915
RMSE: 0.8589
user: 196        item: 302        r_ui = 4.00   est = 3.77   {'actual_k': 60, 'was_impossible': False}
user: 196        item: 302        r_ui = 4.00   est = 3.83   {'was_impossible': False}
