In [1]:
import os
import torch
import random
import numpy as np
import pandas as pd

from utils import copy_dataset, add_self_relation

In [2]:
dataset_str     : str   = "Amazon_Books-part"
test_type       : str   = "fact"
device          : torch.device = torch.device('cuda:0')

In [3]:
# Convert interaction file to kg file
suffix = str(device).split(':')[-1]
fake_dataset = '{}-fake-temp{}'.format(dataset_str, suffix)
src_path = './dataset/{}/'.format(dataset_str)
temp_path = os.path.join('./dataset/', fake_dataset)
copy_dataset(suffix, src_path, temp_path, dataset_str)

In [4]:
inter_pd = pd.read_table(os.path.join(temp_path, fake_dataset+'.inter'))

In [5]:
kg_pd = inter_pd.loc[:, ['user_id:token', 'item_id:token']]
kg_pd.insert(1, 'relation_id:token', ['interaction']*kg_pd.shape[0])
kg_pd.columns = ['head_id:token', 'relation_id:token', 'tail_id:token']

kg_pd_t = inter_pd.loc[:, ['item_id:token', 'user_id:token']]
kg_pd_t.insert(1, 'relation_id:token', ['interaction_t']*kg_pd_t.shape[0])
kg_pd_t.columns = ['head_id:token', 'relation_id:token', 'tail_id:token']

kg_pd = pd.concat([kg_pd, kg_pd_t], ignore_index=True)

item_list = list(set(inter_pd.loc[:,'item_id:token']))
link_pd = pd.DataFrame({'item_id:token':item_list, 'entity_id:token':item_list})

In [6]:
kg_pd.to_csv(os.path.join(temp_path, fake_dataset + '.kg'), '\t', index=False)
link_pd.to_csv(os.path.join(temp_path, fake_dataset + '.link'), '\t', index=False)

In [7]:
import os
import pandas as pd
import torch
import random
import recbole
from shutil import rmtree

from recbole.config.configurator import Config
from recbole.model.knowledge_aware_recommender import *
from MCRec import MCRec

from logging import getLogger
from recbole.utils import init_logger, init_seed
from utils import get_model
from recbole.trainer import KGTrainer
from recbole.config import Config
from recbole.sampler import Sampler
from recbole.data.dataloader import FullSortEvalDataLoader
from recbole.data import create_dataset, data_preparation
from recbole.data.dataset import Dataset, KnowledgeBasedDataset


In [8]:
# Environment settings
dataset_str     : str   = fake_dataset
test_type       : str   = "fact"
device          : torch.device = torch.device('cuda:0')
rate            : float         = 0.25


In [9]:
config_dict = {'seed':random.randint(0, 10000), 'gpu_id':0, 'checkpoint_dir':'saved{}/'.format(str(device).split(':')[-1])}

# evaluation for fake kg
config = Config(model=CFKG, dataset=dataset_str, config_dict=config_dict)
init_logger(config)
logger = getLogger()
logger.info(config)
config['device'] = device

# dataset filtering
dataset = create_dataset(config)

# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)


07 Oct 18:17    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 7972
state = INFO
reproducibility = True
data_path = dataset/Amazon_Books-part-fake-temp0
checkpoint_dir = saved0/
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
field

In [14]:
print(dataset.kg_feat.relation_id)
print(train_data._dataset.kg_feat)
print(test_data._dataset.kg_feat)
print(valid_data._dataset.kg_feat)

tensor([1, 1, 1,  ..., 2, 2, 2])
The batch_size of interaction: 226166
    head_id, torch.Size([226166]), cpu, torch.int64
    relation_id, torch.Size([226166]), cpu, torch.int64
    tail_id, torch.Size([226166]), cpu, torch.int64


The batch_size of interaction: 226166
    head_id, torch.Size([226166]), cpu, torch.int64
    relation_id, torch.Size([226166]), cpu, torch.int64
    tail_id, torch.Size([226166]), cpu, torch.int64


The batch_size of interaction: 226166
    head_id, torch.Size([226166]), cpu, torch.int64
    relation_id, torch.Size([226166]), cpu, torch.int64
    tail_id, torch.Size([226166]), cpu, torch.int64




tensor(1)

In [18]:
head = dataset.token2id(dataset.entity_field, dataset.id2token(dataset.uid_field, train_data._dataset.inter_feat.user_id))
tail = dataset.token2id(dataset.entity_field, dataset.id2token(dataset.iid_field, train_data._dataset.inter_feat.item_id))
relation = [1]*len(head)+[2]*len(head)
interaction = {'head_id':np.concatenate([head,tail]), 'relation_id':relation, 'tail_id':np.concatenate([tail,head])}
print(recbole.data.interaction.Interaction(interaction))

The batch_size of interaction: 183892
    head_id, torch.Size([183892]), cpu, torch.int64
    relation_id, torch.Size([183892]), cpu, torch.int64
    tail_id, torch.Size([183892]), cpu, torch.int64




In [12]:
dataset.kg_feat = interaction
# train_data._dataset.kg_feat = interaction
# test_data._dataset.kg_feat = interaction
# valid_data._dataset.kg_feat = interaction

In [13]:
model = CFKG(config, dataset).to(device)
trainer = KGTrainer(config, model)
trainer.fit(train_data, valid_data, show_progress=False)

05 Oct 21:32    INFO  epoch 0 training [time: 0.36s, train loss: 61.2988]


KeyboardInterrupt: 