In [1]:
%run helpers.ipynb



In [62]:
import surprise
from recommenders.datasets import movielens
from recommenders.models.surprise.surprise_utils import surprise_trainset_to_df

data = movielens.load_pandas_df(size='100k', header=['userID', 'itemID', 'rating'])

data

100%|██████████| 4.81k/4.81k [00:01<00:00, 3.61kKB/s]


Unnamed: 0,userID,itemID,rating
0,196,242,3.0
1,186,302,3.0
2,22,377,1.0
3,244,51,2.0
4,166,346,1.0
...,...,...,...
99995,880,476,3.0
99996,716,204,5.0
99997,276,1090,1.0
99998,13,225,2.0


In [64]:
data.rating = data.rating.map(lambda x: 1 if x >= 3 else 0)
data

Unnamed: 0,userID,itemID,rating
0,196,242,1
1,186,302,1
2,22,377,0
3,244,51,0
4,166,346,0
...,...,...,...
99995,880,476,1
99996,716,204,1
99997,276,1090,0
99998,13,225,0


In [65]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, train_size=0.75)

train

Unnamed: 0,userID,itemID,rating
97862,396,151,1
77208,561,233,0
99684,554,181,1
40993,51,210,1
70440,102,449,1
...,...,...,...
72566,134,269,1
96513,714,257,1
79276,399,218,1
53412,276,254,0


In [66]:
trainset = surprise.Dataset.load_from_df(train , reader=surprise.Reader('ml-100k')).build_full_trainset()

trainset

<surprise.trainset.Trainset at 0x7fe65eaeac10>

In [67]:
from recommenders.utils.timer import Timer

svdpp = surprise.SVDpp(random_state=0, n_factors=200, n_epochs=30, verbose=True)

with Timer() as train_time:
    svdpp.fit(trainset)

print(f"Training took {train_time.interval} seconds")

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19
 processing epoch 20
 processing epoch 21
 processing epoch 22
 processing epoch 23
 processing epoch 24
 processing epoch 25
 processing epoch 26
 processing epoch 27
 processing epoch 28
 processing epoch 29
Training took 1073.6172709170005 seconds


In [33]:
from recommenders.models.surprise.surprise_utils import predict

predictions = predict(svdpp, test, usercol='userID', itemcol='itemID')

In [18]:
from recommenders.models.surprise.surprise_utils import compute_ranking_predictions
with Timer() as test_time:
    all_predictions = compute_ranking_predictions(svdpp, train, usercol='userID', itemcol='itemID', remove_seen=True, verbose=True)
    
print(f"Testing took {test_time.interval} seconds")

Testing took 170.55593502700003 seconds


In [34]:
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)
eval_rmse = rmse(test, predictions)
eval_mae = mae(test, predictions)
eval_rsquared = rsquared(test, predictions)
eval_exp_var = exp_var(test, predictions)

k = 10
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=k)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=k)


print("RMSE:\t\t%f" % eval_rmse,
      "MAE:\t\t%f" % eval_mae, 

print('----')

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

RMSE:		0.944247
MAE:		0.743569
rsquared:	0.296937
exp var:	0.296943
----
MAP:	0.016302
NDCG:	0.112750
Precision@K:	0.102757
Recall@K:	0.036952
