In [1]:
import json

from river import datasets

for x, y in datasets.MovieLens100K():
    print(f'x = {json.dumps(x, indent=4)}')
    print(f'y = {y}')
    break
    
from river import metrics
from river.evaluate import progressive_val_score

def evaluate(model, unpack_user_and_item=True):
    X_y = datasets.MovieLens100K(unpack_user_and_item)
    metric = metrics.MAE() + metrics.RMSE()
    _ = progressive_val_score(X_y, model, metric, print_every=25_000, show_time=True, show_memory=True)


Downloading https://maxhalford.github.io/files/datasets/ml_100k.zip (1.83 MB)
Uncompressing into /home/jbris/river_data/MovieLens100K
x = {
    "user": "259",
    "item": "255",
    "timestamp": 874731910000000000,
    "title": "My Best Friend's Wedding (1997)",
    "release_date": 866764800000000000,
    "genres": "comedy, romance",
    "age": 21.0,
    "gender": "M",
    "occupation": "student",
    "zip_code": "48823"
}
y = 4.0


In [2]:
from river import dummy
from river import stats

model = dummy.StatisticRegressor(stats.Mean())
evaluate(model, unpack_user_and_item=False)

x = {
    'user': 'Guido',
    'item': "Monty Python's Flying Circus"
}

[25,000] MAE: 0.934259
RMSE: 1.124469 – 00:00:00 – 514 B
[50,000] MAE: 0.923893
RMSE: 1.105 – 00:00:00 – 514 B
[75,000] MAE: 0.937359
RMSE: 1.123696 – 00:00:01 – 514 B
[100,000] MAE: 0.942162
RMSE: 1.125783 – 00:00:01 – 514 B


In [3]:
from river import preprocessing
from river import optim
from river import reco

baseline_params = {
    'optimizer': optim.SGD(0.025),
    'l2': 0.,
    'initializer': optim.initializers.Zeros()
}

model = preprocessing.PredClipper(
    regressor=reco.Baseline(**baseline_params),
    y_min=1,
    y_max=5
)

evaluate(model)

[25,000] MAE: 0.761844
RMSE: 0.960972 – 00:00:00 – 173.6 KB
[50,000] MAE: 0.753292
RMSE: 0.951223 – 00:00:01 – 242.23 KB
[75,000] MAE: 0.754177
RMSE: 0.953376 – 00:00:01 – 286.04 KB
[100,000] MAE: 0.754651
RMSE: 0.954148 – 00:00:02 – 309.64 KB


In [4]:
funk_mf_params = {
    'n_factors': 10,
    'optimizer': optim.SGD(0.05),
    'l2': 0.1,
    'initializer': optim.initializers.Normal(mu=0., sigma=0.1, seed=73)
}

model = preprocessing.PredClipper(
    regressor=reco.FunkMF(**funk_mf_params),
    y_min=1,
    y_max=5
)

evaluate(model)

[25,000] MAE: 1.070136
RMSE: 1.397014 – 00:00:00 – 570.35 KB
[50,000] MAE: 0.99174
RMSE: 1.290666 – 00:00:01 – 716 KB
[75,000] MAE: 0.961072
RMSE: 1.250842 – 00:00:02 – 844.09 KB
[100,000] MAE: 0.944883
RMSE: 1.227688 – 00:00:03 – 945.19 KB


In [5]:
biased_mf_params = {
    'n_factors': 10,
    'bias_optimizer': optim.SGD(0.025),
    'latent_optimizer': optim.SGD(0.05),
    'weight_initializer': optim.initializers.Zeros(),
    'latent_initializer': optim.initializers.Normal(mu=0., sigma=0.1, seed=73),
    'l2_bias': 0.,
    'l2_latent': 0.
}

model = preprocessing.PredClipper(
    regressor=reco.BiasedMF(**biased_mf_params),
    y_min=1,
    y_max=5
)

evaluate(model)

[25,000] MAE: 0.761818
RMSE: 0.961057 – 00:00:01 – 669.27 KB
[50,000] MAE: 0.751667
RMSE: 0.949443 – 00:00:02 – 869.85 KB
[75,000] MAE: 0.749653
RMSE: 0.948723 – 00:00:03 – 1 MB
[100,000] MAE: 0.748559
RMSE: 0.947854 – 00:00:04 – 1.11 MB
