In [None]:
from recbole.quick_start import load_data_and_model
from recbole.trainer import Trainer

import recbole.evaluator.register
import recbole.evaluator.collector
import copy
import torch
from recbole.data import (
    create_dataset,
    data_preparation,
)
from recbole.data.transform import construct_transform
from recbole.utils import (
    init_logger,
    get_model,
    get_trainer,
    init_seed,
    set_color,
    get_flops,
    get_environment,
)
from recbole.data.utils import load_split_dataloaders
import numpy as np
import os
from datetime import datetime
import sys
import pandas as pd
from collections import OrderedDict
from recbole.data.dataloader import FullSortEvalDataLoader
from tqdm import tqdm
from recbole.utils import EvaluatorType, set_color, get_gpu_usage

# monkey patch: 修复evaluator的bug: 'Counter' object has no attribute 'cpu'
new_Collector = recbole.evaluator.collector.Collector
def get_data_struct_new(self):
    """Get all the evaluation resource that been collected.
    And reset some of outdated resource.
    """
    for key in self.data_struct._data_dict:
        if isinstance(self.data_struct._data_dict[key], torch.Tensor):
            self.data_struct._data_dict[key] = self.data_struct._data_dict[key].cpu()
        else:
            self.data_struct._data_dict[key] = self.data_struct._data_dict[key] 
    returned_struct = copy.deepcopy(self.data_struct)
    for key in ["rec.topk", "rec.meanrank", "rec.score", "rec.items", "data.label"]:
        if key in self.data_struct:
            del self.data_struct[key]
    return returned_struct

new_Collector.get_data_struct = get_data_struct_new


config, model, dataset, train_data, valid_data, test_data = load_data_and_model('saved/yourfile')

# 调整config参数
config['topk'] = [10, 20, 50]
config['metrics'] = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision', 'map']

# monkey patch: 修复evaluator计算average popularity时的bug: Can not load the data without registration !
trainer = get_trainer(config["MODEL_TYPE"], config["model"])(config, model)
trainer.model.eval()
if isinstance(test_data, FullSortEvalDataLoader):
    eval_func = trainer._full_sort_batch_eval
    if trainer.item_tensor is None:
        trainer.item_tensor = test_data._dataset.get_item_feature().to(trainer.device)
else:
    eval_func = trainer._neg_sample_batch_eval
if trainer.config["eval_type"] == EvaluatorType.RANKING:
    trainer.tot_item_num = test_data._dataset.item_num

iter_data = (
    tqdm(
        test_data,
        total=len(test_data),
        ncols=100,
        desc=set_color(f"Evaluate", "pink"),
    )
    if True
    else test_data
)

num_sample = 0
for batch_idx, batched_data in enumerate(iter_data):
    num_sample += len(batched_data)
    interaction, scores, positive_u, positive_i = eval_func(batched_data)
    if trainer.gpu_available and True:
        iter_data.set_postfix_str(
            set_color("GPU RAM: " + get_gpu_usage(trainer.device), "yellow")
        )
    trainer.eval_collector.eval_batch_collect(
        scores, interaction, positive_u, positive_i
    )
trainer.eval_collector.model_collect(trainer.model)
struct = trainer.eval_collector.get_data_struct()
from collections import Counter
item_id_field = config['ITEM_ID_FIELD']

if hasattr(train_data._dataset, 'inter_feat') and item_id_field in train_data._dataset.inter_feat:
    all_item_interactions_in_dataset = train_data._dataset.inter_feat[item_id_field]
    item_counter_for_popularity = Counter(all_item_interactions_in_dataset.cpu().numpy()) 
    struct['data.count_items'] = item_counter_for_popularity
else:
    print(f"Warning: Could not compute 'data.count_items' manually. Dataset structure unexpected.")

result = trainer.evaluator.evaluate(struct)
if not trainer.config["single_spec"]:
    result = trainer._map_reduce(result, num_sample)
print(result)

07 Jun 21:05    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/amazon-books-23
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 5000
train_batch_size = 8192
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'none', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision', 'map', 'averagepopularity']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 8192
metric_decimal_p

OrderedDict([('recall@10', 0.0955), ('recall@20', 0.147), ('recall@50', 0.2412), ('mrr@10', 0.1053), ('mrr@20', 0.1124), ('mrr@50', 0.1174), ('ndcg@10', 0.0715), ('ndcg@20', 0.0888), ('ndcg@50', 0.1152), ('hit@10', 0.2496), ('hit@20', 0.353), ('hit@50', 0.5064), ('precision@10', 0.0325), ('precision@20', 0.0254), ('precision@50', 0.0171), ('map@10', 0.0413), ('map@20', 0.0456), ('map@50', 0.0501)])
