In [22]:
import logging
from logging import getLogger
import recbole
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_model, get_trainer

In [23]:
config_dict = {
    # dataset config : Sequential Recommendation
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'TIME_FIELD': 'timestamp',
    'RATING_FIELD' : 'rating',
    'load_col':{
        'inter': ['user_id', 'item_id','rating', 'timestamp']
    },
    'ITEM_LIST_LENGTH_FIELD': 'item_length',
    'LIST_SUFFIX': '_list',
    'MAX_ITEM_LIST_LENGTH': '5',

    # model config
    'embedding_size': '64',
    'hidden_size': '128',
    'num_layers': '1',
    'dropout_prob': '0.3',
    'loss_type': 'CE',

    # Training and evaluation config
    'epochs': '100',
    'train_batch_size': '4096',
    'eval_batch_size': '4096',
    'train_neg_sample_args': None,
    'eval_args':{
        'group_by': 'user',
        'order': 'TO',
        'split': {'LS': 'valid_and_test'},
        'mode': 'full',
},
    'metrics': ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision'],
    'topk': '10',
    'valid_metric': 'MRR@10',
}


In [24]:
config = Config(model='GRU4Rec', dataset='amazon', config_dict=config_dict)

init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)


25 Jun 14:44    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True


seed = 2020
state = INFO
reproducibility = True
data_path = dataset/amazon
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 100
train_batch_size = 4096
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = True
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIELD = user_id
ITEM_ID_FI

In [25]:
dataset = create_dataset(config)
print(dataset)
logger.info(dataset)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)


amazon
The number of users: 4465
Average actions of users: 5.721774193548387
The number of items: 20838
Average actions of items: 1.2258002591543888
The number of inters: 25542
The sparsity of the dataset: 99.97254778423475%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']


25 Jun 14:44    INFO  amazon
The number of users: 4465
Average actions of users: 5.721774193548387
The number of items: 20838
Average actions of items: 1.2258002591543888
The number of inters: 25542
The sparsity of the dataset: 99.97254778423475%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']
amazon
The number of users: 4465
Average actions of users: 5.721774193548387
The number of items: 20838
Average actions of items: 1.2258002591543888
The number of inters: 25542
The sparsity of the dataset: 99.97254778423475%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']
amazon
The number of users: 4465
Average actions of users: 5.721774193548387
The number of items: 20838
Average actions of items: 1.2258002591543888
The number of inters: 25542
The sparsity of the dataset: 99.97254778423475%
Remain Fields: ['user_id', 'item_id', 'rating', 'timestamp']
amazon
The number of users: 4465
Average actions of users: 5.721774193548387
The number of items: 20838
Average actions 

In [26]:
train_data, valid_data, test_data = data_preparation(config, dataset)

25 Jun 14:44    INFO  [Training]: train_batch_size = [4096] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [4096] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [4096] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [4096] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
25 Jun 14:44    INFO  [Evaluation]: eval_batch_size = [4096] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}]
[Evaluation]: eval_batch_size = [4096] eval_args: [{'split': {'LS': 'valid_and_test'}, 'order': 'TO', 'group_by': 'user', 'mode': 

In [27]:
for i in train_data:
    print(i)
    break


The batch_size of interaction: 4096
    user_id, torch.Size([4096]), cpu, torch.int64
    item_id, torch.Size([4096]), cpu, torch.int64
    rating, torch.Size([4096]), cpu, torch.float32
    timestamp, torch.Size([4096]), cpu, torch.float32
    item_length, torch.Size([4096]), cpu, torch.int64
    item_id_list, torch.Size([4096, 5]), cpu, torch.int64
    rating_list, torch.Size([4096, 5]), cpu, torch.float32
    timestamp_list, torch.Size([4096, 5]), cpu, torch.float32




In [28]:
gru4rec_model = get_model(config["model"])
gru4rec_model, config["device"]

(recbole.model.sequential_recommender.gru4rec.GRU4Rec, device(type='cpu'))

In [29]:
model = gru4rec_model(config, train_data.dataset).to(config['device'])
logger.info(model)
print(model)

25 Jun 14:44    INFO  GRU4Rec(
  (item_embedding): Embedding(20838, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 1415616
GRU4Rec(
  (item_embedding): Embedding(20838, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 1415616
GRU4Rec(
  (item_embedding): Embedding(20838, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 1415616
GRU4Rec(
  (item_embedding): Embedding(20838, 64, padding_idx=0)
  (emb_dropout):

GRU4Rec(
  (item_embedding): Embedding(20838, 64, padding_idx=0)
  (emb_dropout): Dropout(p=0.3, inplace=False)
  (gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (dense): Linear(in_features=128, out_features=64, bias=True)
  (loss_fct): CrossEntropyLoss()
)


Trainable parameters: 1415616


In [30]:
trainer = Trainer(config, model)

In [31]:
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, verbose=1)

25 Jun 14:44    INFO  epoch 0 training [time: 4.26s, train loss: 39.7833]
epoch 0 training [time: 4.26s, train loss: 39.7833]
epoch 0 training [time: 4.26s, train loss: 39.7833]
epoch 0 training [time: 4.26s, train loss: 39.7833]
25 Jun 14:44    INFO  epoch 0 evaluating [time: 0.21s, valid_score: 0.000200]
epoch 0 evaluating [time: 0.21s, valid_score: 0.000200]
epoch 0 evaluating [time: 0.21s, valid_score: 0.000200]
epoch 0 evaluating [time: 0.21s, valid_score: 0.000200]
25 Jun 14:44    INFO  valid result: 
recall@10 : 0.0008    mrr@10 : 0.0002    ndcg@10 : 0.0004    hit@10 : 0.0008    precision@10 : 0.0001
valid result: 
recall@10 : 0.0008    mrr@10 : 0.0002    ndcg@10 : 0.0004    hit@10 : 0.0008    precision@10 : 0.0001
valid result: 
recall@10 : 0.0008    mrr@10 : 0.0002    ndcg@10 : 0.0004    hit@10 : 0.0008    precision@10 : 0.0001
valid result: 
recall@10 : 0.0008    mrr@10 : 0.0002    ndcg@10 : 0.0004    hit@10 : 0.0008    precision@10 : 0.0001
25 Jun 14:44    INFO  Saving curre

In [32]:
print(best_valid_score, best_valid_result)

0.0016 OrderedDict([('recall@10', 0.0025), ('mrr@10', 0.0016), ('ndcg@10', 0.0018), ('hit@10', 0.0025), ('precision@10', 0.0003)])


In [33]:
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)
trainer 

<recbole.trainer.trainer.Trainer at 0x22f0ce1fb10>

In [35]:
trainer.eval_collector.data_collect(train_data)
trainer

<recbole.trainer.trainer.Trainer at 0x22f0ce1fb10>

In [36]:
checkpoint_file = "saved/latest.pth"
test_result = trainer.evaluate(test_data, model_file=checkpoint_file)
print(test_result)

25 Jun 14:48    INFO  Loading model structure and parameters from saved/latest.pth
Loading model structure and parameters from saved/latest.pth
Loading model structure and parameters from saved/latest.pth
Loading model structure and parameters from saved/latest.pth


OrderedDict([('recall@10', 0.0031), ('mrr@10', 0.0009), ('ndcg@10', 0.0014), ('hit@10', 0.0031), ('precision@10', 0.0003)])
