In [55]:
from surprise import SVD, Dataset, accuracy, Reader

In [67]:
from surprise.model_selection import train_test_split, cross_validate, GridSearchCV

In [57]:
import pandas as pd

In [58]:
reader = Reader(rating_scale=(1,5))

In [59]:
rating = pd.read_csv("../datasets/u.data", sep="\t", names=["user_id", "item_id", "rating", "time"])

In [60]:
rating.head()

Unnamed: 0,user_id,item_id,rating,time
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [61]:
rating = rating.drop("time", axis=1)

In [62]:
rating.head()

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [63]:
data = Dataset.load_from_df(rating[["user_id", "item_id", "rating"]], reader=reader)

In [86]:
parameters={
    "n_epochs": [5, 10],
    "lr_all":   [0.001, 0.006],
    "reg_all":  [0.1, 0.5],
}

In [87]:
gs = GridSearchCV(SVD, parameters, measures=['rmse', 'mae'], cv=3)

In [88]:
gs.fit(data)

In [89]:
gs.best_score["rmse"]

0.9498610094163434

In [93]:
gs.best_params["rmse"]

{'n_epochs': 10, 'lr_all': 0.006, 'reg_all': 0.1}

In [94]:
best_svd = gs.best_estimator["rmse"]

In [84]:
best_svd.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fc5182a71c0>

In [85]:
best_svd.predict(uid="196", iid="242")

Prediction(uid='196', iid='242', r_ui=None, est=3.52986, details={'was_impossible': False})

In [95]:
cross_validate(best_svd, data, measures=["RMSE"])

{'test_rmse': array([0.94203804, 0.93656799, 0.94907282, 0.94591562, 0.95338592]),
 'fit_time': (1.2264306545257568,
  1.3327202796936035,
  1.1871767044067383,
  1.2007067203521729,
  1.1989727020263672),
 'test_time': (0.0570073127746582,
  0.05801200866699219,
  0.05658745765686035,
  0.05679011344909668,
  0.05689811706542969)}