In [None]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append("../../")

from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.io as pio
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.utilities.seed import seed_everything

from src.models import MatrixFactorization2
from src.util import Data, metrics


seed_everything(42)


pio.renderers.default = "notebook"
RATINGS_PATH = Path("../../../data/ratings.csv")
OUTPUT_PATH = Path("../../models/mf.pt")

OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)

In [None]:
data = Data(RATINGS_PATH)

train_loader = data.get_train_loader(batch_size=20_000)
val_loader = data.get_val_loader(batch_size=20_000)
test_loader = data.get_test_loader(batch_size=20_000)

In [None]:
model = MatrixFactorization2(
    user_dim=data.user_count,
    beer_dim=data.beer_count,
    n_factors=10,
    embedding_rescaler=0.01,
    use_mlp=False,
    interactions=data.train_interactions,
    user_encoder=data.user_encoder,
    beer_encoder=data.beer_encoder,
    max_rating=data.max_rating,
    learning_rate=2e-3,
    weight_decay=1e-6
)

early_stop_callback = EarlyStopping(
   monitor="val/rmse",
   min_delta=0.000,
   patience=3,
   verbose=False,
   mode="min"
)

trainer = Trainer(
    max_epochs=50,
    gpus=1,
    progress_bar_refresh_rate=50,
    # callbacks=[early_stop_callback]
    # auto_scale_batch_size=True
)

In [None]:
trainer.fit(model, train_loader, val_loader)
model.eval()
torch.save(model, OUTPUT_PATH)

In [None]:
# model.set_predict_device()

# scores, errors = metrics.test_model(
#     data.test_discretized_ratings,
#     model,
#     k=20
# )

In [None]:
# px.box(scores, x="metric", y="score", color="metric", title="Metrics")

In [None]:
# px.box(errors, x="metric", y="error", color="metric", title="Errors")