In [13]:
import logging
from logging import getLogger
import recbole 
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_model, get_trainer

In [14]:
parameter_dict = {
    # seq_separator: ","
    'data_path':'',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'RATING_FIELD': 'rating',
    'TIME_FIELD': 'timestamp',

    'load_col': {'inter': ['user_id', 'item_id', 'rating', 'timestamp']},
    'device' : 'GPU',

    # model config
    'embedding_size': '64',
    'hidden_size': '128',
    'num_layers': '1',
    'dropout_prob': '0.3',
    'loss_type': 'CE',


    # 'eval_setting': TO_LS, full,
    'train_neg_sample_args': None,
    'group_by_user': True,
    'metrics': ["Recall", "MRR", "NDCG", "Hit", "Precision"],
    'topk': 10,
    'metric_decimal_place': 5,

    'learning_rate': 0.0001,
    'epochs': 50,
    'train_batch_size': '512',
    'eval_batch_size': '512',
}


In [15]:
config = Config(model='GRU4Rec', dataset='Dianping_local', config_dict = parameter_dict)

init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)


19 Jun 12:31    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = Dianping_local
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 50
train_batch_size = 512
learner = adam
learning_rate = 0.0001
train_neg_sample_args = {'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = True
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 512
metric_decimal_place = 5

Dataset Hyper Parameters:

In [16]:
dataset = create_dataset(config)
logger.info(dataset)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
19 Jun 12:31    INFO  Dianping_local
The number of users: 121334
Average actions of users: 8.241772642232533
The number of items: 1

In [17]:
train_data, valid_data, test_data = data_preparation(config, dataset)

19 Jun 12:32    INFO  [Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
19 Jun 12:32    INFO  [Evaluation]: eval_batch_size = [512] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}]
[Evaluation]: eval_batch_size = [512] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}]
[Evaluation]: eval_batch_size = [512] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'val

In [18]:
for i in train_data:
    print(i)
    break 

The batch_size of interaction: 512
    user_id, torch.Size([512]), cpu, torch.int64
    item_id, torch.Size([512]), cpu, torch.int64
    rating, torch.Size([512]), cpu, torch.float32
    timestamp, torch.Size([512]), cpu, torch.float32
    item_length, torch.Size([512]), cpu, torch.int64
    item_id_list, torch.Size([512, 50]), cpu, torch.int64
    rating_list, torch.Size([512, 50]), cpu, torch.float32
    timestamp_list, torch.Size([512, 50]), cpu, torch.float32




In [19]:
gru4rec_model = get_model(config["model"])
gru4rec_model, config["device"]

(recbole.model.sequential_recommender.gru4rec.GRU4Rec, device(type='cpu'))

In [20]:
model = gru4rec_model(config, train_data.dataset).to(config['device'])
logger.info(model)

19 Jun 12:32    INFO  GRU4Rec(
  (item_embedding): Embedding(10870, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 777664
GRU4Rec(
  (item_embedding): Embedding(10870, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 777664
GRU4Rec(
  (item_embedding): Embedding(10870, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 777664


In [21]:
config["model"], config["MODEL_TYPE"], config["device"]

('GRU4Rec', <ModelType.SEQUENTIAL: 2>, device(type='cpu'))

In [22]:
for i in train_data:
    print(i)
    break 

The batch_size of interaction: 512
    user_id, torch.Size([512]), cpu, torch.int64
    item_id, torch.Size([512]), cpu, torch.int64
    rating, torch.Size([512]), cpu, torch.float32
    timestamp, torch.Size([512]), cpu, torch.float32
    item_length, torch.Size([512]), cpu, torch.int64
    item_id_list, torch.Size([512, 50]), cpu, torch.int64
    rating_list, torch.Size([512, 50]), cpu, torch.float32
    timestamp_list, torch.Size([512, 50]), cpu, torch.float32




In [23]:
#  trainer loading and initialization
trainer = Trainer(config, model)
trainer 

<recbole.trainer.trainer.Trainer at 0x1d6b5fbb150>

In [24]:
# model training
best_valid_score, best_valid_result = trainer.fit(train_data)

19 Jun 12:52    INFO  epoch 0 training [time: 1196.53s, train loss: 12657.1690]
epoch 0 training [time: 1196.53s, train loss: 12657.1690]
epoch 0 training [time: 1196.53s, train loss: 12657.1690]
19 Jun 12:52    INFO  Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
19 Jun 13:11    INFO  epoch 1 training [time: 1188.80s, train loss: 12204.7012]
epoch 1 training [time: 1188.80s, train loss: 12204.7012]
epoch 1 training [time: 1188.80s, train loss: 12204.7012]
19 Jun 13:11    INFO  Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
Saving current: saved\GRU4Rec-Jun-19-2024_12-32-07.pth
19 Jun 14:45    INFO  epoch 2 training [time: 5625.76s, train loss: 12113.0081]
epoch 2 training [time: 5625.76s, train loss: 12113.0081]
epoch 2 training [time: 5625.76s, train loss: 12113.0081]
19 Jun 14:45    INFO  Saving current: 

KeyboardInterrupt: 