In [2]:
import torch
import pickle
import argparse
from openke.config import Trainer, Tester
from openke.data import TrainDataLoader, TestDataLoader
from openke.data.PyTorchTrainDataLoader import PyTorchTrainDataLoader
from openke.module.model import TransE, TransH, TransR, RotatE
from openke.module.strategy import NegativeSampling
from openke.module.loss import MarginLoss, SigmoidLoss

def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')
def get_parser(jupyter=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='laptop')
    parser.add_argument('--nbatches', type=int, default=100)
    parser.add_argument('--threads', type=int, default=16)
    parser.add_argument('--sampling_model', type=str, default='normal')
    parser.add_argument('--bern_flag', type=int, default=1)
    parser.add_argument('--filter_flag', type=int, default=1)
    parser.add_argument('--neg_ent', type=int, default=25)
    parser.add_argument('--neg_rel', type=int, default=0)
    parser.add_argument('--emb_dim', type=int, default=200)
    parser.add_argument('--p_norm', type=int, default=1)
    parser.add_argument('--norm_flag', type=str2bool, default='True')
    parser.add_argument('--margin', type=float, default=5.0)
    parser.add_argument('--train_epoch', type=int, default=10)
    parser.add_argument('--alpha', type=float, default=1.0)
    parser.add_argument('--use_gpu', type=str2bool, default='True')
    
    if jupyter is True : args = parser.parse_args(args=[])
    else : args = parser.parse_args()
    return args

# TransE

In [8]:
train_dataloader = TrainDataLoader(
    in_path = f'./benchmarks/{args.dataset}/', # if not correct, it will failed without log
    nbatches = args.nbatches,
    threads = args.threads, 
    sampling_mode = args.sampling_model, 
    bern_flag = args.bern_flag, 
    filter_flag = args.filter_flag, 
    neg_ent = args.neg_ent,
    neg_rel = args.neg_rel)

test_dataloader = TestDataLoader(f'./benchmarks/{args.dataset}/', "link",type_constrain=False)

transe = TransE(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = args.emb_dim,
    p_norm = args.p_norm,
    norm_flag = args.norm_flag)

model = NegativeSampling(
    model = transe,
    loss = MarginLoss(margin=args.margin),
    batch_size = train_dataloader.get_batch_size())

trainer = Trainer(
    model = model, 
    data_loader = train_dataloader, 
    train_times = args.train_epoch, 
    alpha = args.alpha,
    use_gpu = args.use_gpu)

trainer.run()

tester = Tester(
    model = transe, 
    data_loader = test_dataloader, 
    use_gpu = args.use_gpu)

mrr, mr, hit10, hit3, hit1 = tester.run_link_prediction(type_constrain = False)
print(f"mrr: {mrr}, mr: {mr}, hit10: {hit10}, hit3: {hit3}, hit1: {hit1}")

output = transe.ent_embeddings.weight.cpu().detach().numpy()
pickle.dump(output, open(f'./graph/{args.dataset}_kge_transe.pkl','wb'))

  0%|          | 0/10 [00:00<?, ?it/s]

Finish initializing...


Epoch 9 | loss: 3.645945: 100%|██████████| 10/10 [00:02<00:00,  3.94it/s]
100%|██████████| 12741/12741 [00:16<00:00, 770.09it/s]


0.9043246507644653
mrr: 0.8357038497924805, mr: 128.99847412109375, hit10: 0.9043246507644653, hit3: 0.8641393780708313, hit1: 0.7953850030899048


# TransH

In [34]:
# define the arg
args = get_parser(jupyter=True)
args.dataset = 'laptop'
args.train_epoch = 10

In [35]:
train_dataloader = TrainDataLoader(
    in_path = f'./benchmarks/{args.dataset}/', # if not correct, it will failed without log
    nbatches = args.nbatches,
    threads = args.threads, 
    sampling_mode = args.sampling_model, 
    bern_flag = args.bern_flag, 
    filter_flag = args.filter_flag, 
    neg_ent = args.neg_ent,
    neg_rel = args.neg_rel)

test_dataloader = TestDataLoader(f'./benchmarks/{args.dataset}/', "link",type_constrain=False)

transh = TransH(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = args.emb_dim,
    p_norm = args.p_norm,
    norm_flag = args.norm_flag)

model = NegativeSampling(
	model = transh, 
	loss = MarginLoss(margin = args.margin),
	batch_size = train_dataloader.get_batch_size()
)

# train the model
trainer = Trainer(
    model = model, 
    data_loader = train_dataloader, 
    train_times = args.train_epoch, 
    alpha = args.alpha,
    use_gpu = args.use_gpu)

trainer.run()

tester = Tester(
    model = transh, 
    data_loader = test_dataloader, 
    use_gpu = args.use_gpu)

mrr, mr, hit10, hit3, hit1 = tester.run_link_prediction(type_constrain = False)
print(f"mrr: {'%.4f'%mrr}, mr: {'%.4f'%mr}, hit10: {'%.4f'%hit10}, hit3: {'%.4f'%hit3}, hit1: {'%.4f'%hit1}")


output = transh.ent_embeddings.weight.cpu().detach().numpy()
pickle.dump(output, open(f'./graph/{args.dataset}_kge_transh.pkl','wb'))

  0%|          | 0/10 [00:00<?, ?it/s]

Finish initializing...


Epoch 9 | loss: 3.789272: 100%|██████████| 10/10 [00:18<00:00,  1.85s/it]
100%|██████████| 12741/12741 [01:10<00:00, 180.37it/s]

0.9054626822471619
mrr: 0.8374, mr: 123.2658, hit10: 0.9055, hit3: 0.8622, hit1: 0.7986





# TransR

In [19]:
# define the arg
args = get_parser(jupyter=True)
args.dataset = 'twitter'
args.train_epoch = 25

In [20]:
train_dataloader = TrainDataLoader(
    in_path = f'./benchmarks/{args.dataset}/', # if not correct, it will failed without log
    nbatches = args.nbatches,
    threads = args.threads, 
    sampling_mode = args.sampling_model, 
    bern_flag = args.bern_flag, 
    filter_flag = args.filter_flag, 
    neg_ent = args.neg_ent,
    neg_rel = args.neg_rel)

test_dataloader = TestDataLoader(f'./benchmarks/{args.dataset}/', "link",type_constrain=False)

transe = TransE(
    ent_tot = train_dataloader.get_ent_tot(),
    rel_tot = train_dataloader.get_rel_tot(),
    dim = args.emb_dim,
    p_norm = args.p_norm,
    norm_flag = args.norm_flag)

model_e = NegativeSampling(
    model = transe,
    loss = MarginLoss(margin=args.margin),
    batch_size = train_dataloader.get_batch_size())

transr = TransR(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim_e = args.emb_dim,
	dim_r = args.emb_dim,
	p_norm = args.p_norm, 
	norm_flag = args.norm_flag,
	rand_init = False)

model_r = NegativeSampling(
	model = transr,
	loss = MarginLoss(margin = args.margin-1),
	batch_size = train_dataloader.get_batch_size()
)

trainer = Trainer(
    model = model_e, 
    data_loader = train_dataloader, 
    train_times = 1, 
    alpha = 0.5,
    use_gpu = args.use_gpu)
trainer.run()
parameters = transe.get_parameters()

transr.set_parameters(parameters)
trainer = Trainer(
    model = model_r, 
    data_loader = train_dataloader, 
    train_times = args.train_epoch, 
    alpha = args.alpha, 
    use_gpu = args.use_gpu)

trainer.run()
tester = Tester(
    model = transr, 
    data_loader = test_dataloader, 
    use_gpu = args.use_gpu)

mrr, mr, hit10, hit3, hit1 = tester.run_link_prediction(type_constrain = False)
print(f"mrr: {'%.4f'%mrr}, mr: {'%.4f'%mr}, hit10: {'%.4f'%hit10}, hit3: {'%.4f'%hit3}, hit1: {'%.4f'%hit1}")

output = transr.ent_embeddings.weight.cpu().detach().numpy()
pickle.dump(output, open(f'./graph/{args.dataset}_kge_transr.pkl','wb'))

  0%|          | 0/1 [00:00<?, ?it/s]

Finish initializing...


Epoch 0 | loss: 24.854141: 100%|██████████| 1/1 [00:01<00:00,  1.46s/it]
  0%|          | 0/25 [00:00<?, ?it/s]

Finish initializing...


Epoch 24 | loss: 0.407803: 100%|██████████| 25/25 [01:46<00:00,  4.25s/it]
100%|██████████| 13313/13313 [01:16<00:00, 173.94it/s]

0.937091588973999
mrr: 0.8746, mr: 23.1594, hit10: 0.9371, hit3: 0.9087, hit1: 0.8330





# RotatE

In [3]:
args = get_parser(jupyter=True)
args.dataset = 'laptop'
args.sampling_mode = 'cross'
args.batch_size = 2000
args.thread = 8
args.bern_flag = 0
args.filter_flag = 1
args.neg_ent = 64
args.neg_rel = 0
args.emb_dim = 200
args.train_epoch = 200

In [None]:
# dataloader for training
train_dataloader = TrainDataLoader(
	in_path = f"./benchmarks/{args.dataset}/", 
	batch_size = args.batch_size,
	threads = args.thread,
	sampling_mode = "cross", 
	bern_flag = args.bern_flag, 
	filter_flag = args.filter_flag, 
	neg_ent = args.neg_ent,
	neg_rel = args.neg_rel
)

# dataloader for test
test_dataloader = TestDataLoader(f"./benchmarks/{args.dataset}/", "link",type_constrain=False)

# define the model
rotate = RotatE(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim = args.emb_dim,
	margin = 6.0,
	epsilon = 2.0,
)

# define the loss function
model = NegativeSampling(
	model = rotate, 
	loss = SigmoidLoss(adv_temperature = 2),
	batch_size = train_dataloader.get_batch_size(), 
	regul_rate = 0.0
)

# train the model
trainer = Trainer(model = model, data_loader = train_dataloader, train_times = args.train_epoch, alpha = 2e-5, use_gpu = args.use_gpu, opt_method = "adam")
trainer.run()

tester = Tester(model = rotate, data_loader = test_dataloader, use_gpu = True)

mrr, mr, hit10, hit3, hit1 = tester.run_link_prediction(type_constrain = False)
print(f"mrr: {'%.4f'%mrr}, mr: {'%.4f'%mr}, hit10: {'%.4f'%hit10}, hit3: {'%.4f'%hit3}, hit1: {'%.4f'%hit1}")

output = rotate.ent_embeddings.weight.cpu().detach().numpy()
pickle.dump(output, open(f'./graph/{args.dataset}_kge_rotate_1.pkl','wb'))