In [1]:
import numpy as np
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold

In [2]:
reader = Reader()

In [3]:
movie_ratings = pd.read_csv('ratings_small.csv')
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
data = Dataset.load_from_df(movie_ratings[['userId','movieId','rating']],reader)
kf = KFold(n_splits=5)
kf.split(data)

<generator object KFold.split at 0x000001CBD0D1AF90>

In [5]:
from surprise.model_selection import cross_validate

algo = SVD()
cross_validate(algo, data, measures=['RMSE', 'MAE'])

{'test_rmse': array([0.893957  , 0.89664766, 0.89690695, 0.89713501, 0.89509078]),
 'test_mae': array([0.6882143 , 0.69227843, 0.68982139, 0.6917253 , 0.687614  ]),
 'fit_time': (5.393148422241211,
  5.2628333568573,
  5.295105457305908,
  5.970364570617676,
  5.625375270843506),
 'test_time': (0.22041034698486328,
  0.21047163009643555,
  0.20648717880249023,
  0.1770007610321045,
  0.19647216796875)}

In [6]:
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1cbd0d4fb20>

In [7]:
testset = trainset.build_testset()
predictions = algo.test(testset)

accuracy.rmse(predictions, verbose=True)

RMSE: 0.6463


0.6462919169795448

In [8]:
movie_ratings[movie_ratings['userId'] == 1]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
5,1,1263,2.0,1260759151
6,1,1287,2.0,1260759187
7,1,1293,2.0,1260759148
8,1,1339,3.5,1260759125
9,1,1343,2.0,1260759131


In [9]:
userid = 1
movieId = 1287
actual_rating = 2

In [10]:
algo.predict(userid, movieId, actual_rating)

Prediction(uid=1, iid=1287, r_ui=2, est=2.9490679770866097, details={'was_impossible': False})

In [11]:
from collections import defaultdict

from surprise import SVD
from surprise import Dataset


def get_top_n(predictions, n=10):


    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n


In [12]:
top_n = get_top_n(predictions, n=10)

In [13]:
for uid, user_ratings in top_n.items():
    print(uid, [iid for (iid, _) in user_ratings])

1 [1172, 1953, 3671, 1061, 1287, 1339, 1029, 2105, 2150, 1293]
2 [527, 720, 589, 50, 17, 497, 515, 551, 150, 265]
3 [318, 1197, 2959, 296, 778, 356, 2858, 2318, 50068, 3949]
4 [296, 858, 1198, 1213, 1270, 1288, 2918, 2064, 1291, 356]
5 [1035, 30749, 2081, 5995, 919, 4995, 33166, 2762, 6377, 903]
6 [2019, 1204, 7153, 5952, 293, 1276, 1250, 2692, 111, 903]
7 [1196, 318, 1210, 260, 1198, 745, 1225, 1223, 541, 720]
8 [50, 318, 2858, 527, 2959, 2571, 47, 858, 2329, 593]
9 [608, 527, 2571, 318, 593, 534, 1682, 2762, 515, 1704]
10 [50, 318, 1611, 1197, 2571, 1089, 1198, 1719, 735, 1358]
11 [48516, 50, 923, 778, 1201, 26614, 296, 88129, 96079, 80489]
12 [1235, 2959, 1215, 3793, 1387, 3879, 6184, 3825, 1220, 2460]
13 [58559, 527, 318, 7502, 4993, 1259, 3147, 356, 6377, 78499]
14 [1196, 3175, 2716, 3114, 3751, 2038, 2355, 2394, 3988, 594]
15 [47, 608, 2571, 1252, 111, 1196, 296, 50, 1210, 858]
16 [318, 2858, 750, 50, 6016, 1704, 1961, 527, 4995, 1653]
17 [47, 296, 912, 1089, 858, 1237, 2959, 50,

In [14]:
predictions[:100]

[Prediction(uid=1, iid=31, r_ui=2.5, est=2.2328662001919892, details={'was_impossible': False}),
 Prediction(uid=1, iid=1029, r_ui=3.0, est=2.8848557377542665, details={'was_impossible': False}),
 Prediction(uid=1, iid=1061, r_ui=3.0, est=2.959325329742752, details={'was_impossible': False}),
 Prediction(uid=1, iid=1129, r_ui=2.0, est=2.2957446132439823, details={'was_impossible': False}),
 Prediction(uid=1, iid=1172, r_ui=4.0, est=3.572741010356983, details={'was_impossible': False}),
 Prediction(uid=1, iid=1263, r_ui=2.0, est=2.7565660042091698, details={'was_impossible': False}),
 Prediction(uid=1, iid=1287, r_ui=2.0, est=2.9490679770866097, details={'was_impossible': False}),
 Prediction(uid=1, iid=1293, r_ui=2.0, est=2.831770595264351, details={'was_impossible': False}),
 Prediction(uid=1, iid=1339, r_ui=3.5, est=2.9002960528596704, details={'was_impossible': False}),
 Prediction(uid=1, iid=1343, r_ui=2.0, est=2.6868272591105167, details={'was_impossible': False}),
 Prediction(uid