In [8]:
from surprise import accuracy, Dataset, SVD, KNNBasic
from surprise.model_selection import KFold, cross_validate, train_test_split, GridSearchCV
import pandas as pd

# Load the movielens-100k dataset
data = Dataset.load_builtin("ml-100k")

# define a cross-validation iterator
kf = KFold(n_splits=3)

algo = SVD()

for trainset, testset in kf.split(data):

    # train and test algorithm.
    algo.fit(trainset)
    predictions = algo.test(testset)

    # Compute and print Root Mean Squared Error
    accuracy.rmse(predictions, verbose=True)



RMSE: 0.9472
RMSE: 0.9432
RMSE: 0.9504


In [3]:
data.raw_ratings[0] # **Usid, Itemid, Rating, Timestamp**

('196', '242', 3.0, '881250949')

In [4]:
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9272  0.9390  0.9495  0.9406  0.9266  0.9366  0.0087  
MAE (testset)     0.7289  0.7403  0.7490  0.7412  0.7289  0.7376  0.0078  
Fit time          0.94    1.01    0.94    0.95    0.97    0.96    0.03    
Test time         0.16    0.10    0.10    0.19    0.12    0.13    0.04    


{'test_rmse': array([0.92716743, 0.93896151, 0.94953364, 0.94061658, 0.92663378]),
 'test_mae': array([0.72886645, 0.7402503 , 0.74896858, 0.74118376, 0.72888949]),
 'fit_time': (0.9398612976074219,
  1.010058879852295,
  0.9356522560119629,
  0.9465651512145996,
  0.9684898853302002),
 'test_time': (0.1599893569946289,
  0.09660124778747559,
  0.09539341926574707,
  0.18687939643859863,
  0.11659073829650879)}

In [5]:
# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=0.25)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 0.9388


0.9387834159339667

In [6]:
# Retrieve the trainset.
trainset = data.build_full_trainset()

# Build an algorithm, and train it.
algo = KNNBasic()
algo.fit(trainset)

uid = str(196)  # raw user id (as in the ratings file). They are **strings**!
iid = str(302)  # raw item id (as in the ratings file). They are **strings**!

# get a prediction for specific users and items.
pred = algo.predict(uid, iid, r_ui=4, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
user: 196        item: 302        r_ui = 4.00   est = 4.06   {'actual_k': 40, 'was_impossible': False}


In [7]:
param_grid = {"n_epochs": [5, 10], "lr_all": [0.002, 0.005], "reg_all": [0.4, 0.6]}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score["rmse"])

# combination of parameters that gave the best RMSE score
print(gs.best_params["rmse"])

0.9635589129386765
{'n_epochs': 10, 'lr_all': 0.005, 'reg_all': 0.4}


In [12]:
# create recommender system for movileens-100k dataset
data = Dataset.load_builtin("ml-100k")

# split data into train and test set
trainset, testset = train_test_split(data, test_size=.25)

# create SVD algorithm
algo = SVD()

# train algorithm on trainset
algo.fit(trainset)

# predict ratings for testset
predictions = algo.test(testset)

# create dataframe with predictions
df = pd.DataFrame(predictions, columns=['uid', 'iid', 'rui', 'est', 'details'])

# create dataframe with predictions for user 196
df_196 = df[df.uid == 196]  
df_196 = df_196.sort_values(by='est', ascending=False)

# print top 5 recommendations for user 196
df_196.head(5)

Unnamed: 0,uid,iid,rui,est,details
