# 4 - Matrix Factorization 2 + MLP

In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append("../../")

from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.io as pio
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.utilities.seed import seed_everything

from src.models import MatrixFactorization2
from src.util import Data, metrics


seed_everything(42)


pio.renderers.default = "notebook"
RATINGS_PATH = Path("../../../data/ratings.csv")
OUTPUT_PATH = Path("../../models/mf2_mlp.pt")

OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)

Global seed set to 42


In [2]:
data = Data(RATINGS_PATH)

train_loader = data.get_train_loader(batch_size=5_000)
val_loader = data.get_val_loader(batch_size=5_000)
test_loader = data.get_test_loader(batch_size=5_000)

In [3]:
model = MatrixFactorization2(
    user_dim=data.user_count,
    beer_dim=data.beer_count,
    n_factors=10,
    embedding_rescaler=0.01,
    interactions=data.train_interactions,
    user_encoder=data.user_encoder,
    beer_encoder=data.beer_encoder,
    max_rating=data.max_rating,
    use_mlp=True,
    n_layers=1,
    learning_rate=1e-3,
    weight_decay=1e-6
)

early_stop_callback = EarlyStopping(
   monitor="val/rmse",
   min_delta=0.000,
   patience=3,
   verbose=False,
   mode="min"
)

trainer = Trainer(
    max_epochs=50,
    gpus=1,
    progress_bar_refresh_rate=50,
    callbacks=[early_stop_callback]
    # auto_scale_batch_size=True
)

Building interaction matrix: 100%|██████████| 243264/243264 [00:00<00:00, 1491593.68it/s]
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [4]:
trainer.fit(model, train_loader, val_loader)
model.eval()
torch.save(model, OUTPUT_PATH)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type       | Params
----------------------------------------------
0 | user_bias      | Embedding  | 7.8 K 
1 | beer_bias      | Embedding  | 9.2 K 
2 | user_embedding | Embedding  | 77.7 K
3 | beer_embedding | Embedding  | 91.5 K
4 | linears        | ModuleList | 265   
5 | dropouts       | ModuleList | 0     
6 | linear_n       | Linear     | 6     
7 | sigmoid        | Sigmoid    | 0     
----------------------------------------------
169 K     Trainable params
16.9 K    Non-trainable params
186 K     Total params
0.746     Total estimated model params size (MB)
Epoch 0:  76%|███████▌  | 50/66 [00:02<00:00, 17.05it/s]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/17 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 66/66 [00:03<00:00, 17.29it/s, loss=0.0275, v_num=7]
Epoch 1:  76%|███████▌  | 50/66 [00:02<00:00, 18.09it/s, loss=0.0275, v_num=7]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          

In [12]:
model.set_predict_device()

scores, errors = metrics.test_model(
    data.test_discretized_ratings,
    model,
    k=20
)

Calculating predictions: 100%|██████████| 4667/4667 [00:44<00:00, 105.32it/s]
Calculating MRR: 100%|██████████| 4667/4667 [00:05<00:00, 840.01it/s]
Calculating MAP: 100%|██████████| 4667/4667 [00:05<00:00, 829.83it/s]
Calculating NDCG: 100%|██████████| 4667/4667 [00:12<00:00, 374.75it/s]
Calculating RMSE: 100%|██████████| 4667/4667 [00:11<00:00, 405.80it/s]


In [11]:
px.box(scores, x="metric", y="score", color="metric", title="Metrics")

In [10]:
px.box(errors, x="metric", y="error", color="metric", title="Errors")