# 5 - LightGCN

In [1]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.append("../../")

from pathlib import Path

import pandas as pd
import plotly.express as px
import plotly.io as pio
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.utilities.seed import seed_everything

from src.models import LightGCN
from src.util import Data, metrics


seed_everything(42)


pio.renderers.default = "notebook"
RATINGS_PATH = Path("../../../data/ratings.csv")
OUTPUT_PATH = Path("../../models/lgcn.pt")

OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)

Global seed set to 42


In [2]:
data = Data(RATINGS_PATH)

train_loader = data.get_train_loader(batch_size=5_000)
val_loader = data.get_val_loader(batch_size=5_000)
test_loader = data.get_test_loader(batch_size=5_000)

In [3]:
model = LightGCN(
    user_dim=data.user_count,
    beer_dim=data.beer_count,
    n_factors=10,
    n_layers=2,
    interactions=data.train_interactions,
    user_encoder=data.user_encoder,
    beer_encoder=data.beer_encoder,
    max_rating=data.max_rating,
    learning_rate=1e-3,
    weight_decay=1e-6
)

early_stop_callback = EarlyStopping(
   monitor="val/rmse",
   min_delta=0.000,
   patience=3,
   verbose=False,
   mode="min"
)

trainer = Trainer(
    max_epochs=50,
    gpus=1,
    progress_bar_refresh_rate=50,
    # callbacks=[early_stop_callback]
    # auto_scale_batch_size=True
)

Building interaction matrix: 100%|██████████| 243264/243264 [00:00<00:00, 1549442.18it/s]
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [4]:
trainer.fit(model, train_loader, val_loader)
model.eval()
torch.save(model, OUTPUT_PATH)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type      | Params
---------------------------------------------
0 | user_embedding | Embedding | 77.7 K
1 | beer_embedding | Embedding | 91.5 K
2 | sigmoid        | Sigmoid   | 0     
---------------------------------------------
169 K     Trainable params
0         Non-trainable params
169 K     Total params
0.677     Total estimated model params size (MB)
Epoch 0:  76%|███████▌  | 50/66 [00:03<00:01, 14.99it/s]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/17 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 66/66 [00:04<00:00, 15.98it/s, loss=0.11, v_num=0]
Epoch 1:  76%|███████▌  | 50/66 [00:03<00:01, 15.42it/s, loss=0.11, v_num=0]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/17 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 66/66 [00:04<00:00, 16.29it/s, loss=0.104, v_num=0]
Epoch 2:  76%|███████▌  | 50/66 [00:03<00:01, 15.07it/s, loss=0.104, v_num=0]
Validating: 0it [00:00, ?it/s][

In [5]:
# model.set_predict_device()

# scores, errors = metrics.test_model(
#     data.test_discretized_ratings,
#     model,
#     k=20
# )

In [6]:
# px.box(scores, x="metric", y="score", color="metric", title="Metrics")

In [7]:
# px.box(errors, x="metric", y="error", color="metric", title="Errors")