In [1]:
!git clone -b OpenKE-PyTorch https://github.com/thunlp/OpenKE
%cd OpenKE/
!mkdir checkpoint
!mkdir result
%cd openke
!bash make.sh
%cd /content

Cloning into 'OpenKE'...
remote: Enumerating objects: 1276, done.[K
remote: Total 1276 (delta 0), reused 0 (delta 0), pack-reused 1276[K
Receiving objects: 100% (1276/1276), 287.90 MiB | 26.31 MiB/s, done.
Resolving deltas: 100% (610/610), done.
/content/OpenKE
/content/OpenKE/openke
/content


In [2]:
!git clone https://github.com/ContentSide/French_Knowledge_Graph.git

Cloning into 'French_Knowledge_Graph'...
remote: Enumerating objects: 286, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (9/9), done.[K
remote: Total 286 (delta 22), reused 17 (delta 17), pack-reused 260[K
Receiving objects: 100% (286/286), 38.27 MiB | 25.48 MiB/s, done.
Resolving deltas: 100% (146/146), done.


In [3]:
%cd OpenKE
!cp -r ../French_Knowledge_Graph/benchmarks/RezoJDM16K benchmarks
!mkdir benchmarks/RezoJDM16K/checkpoint
!mkdir benchmarks/RezoJDM16K/result

/content/OpenKE


In [4]:
import openke
import numpy as np
from openke.config import Trainer, Tester
from openke.module.model import TransE
from openke.module.loss import MarginLoss
from openke.module.strategy import NegativeSampling
from openke.data import TrainDataLoader, TestDataLoader

# dataloader for training
train_dataloader = TrainDataLoader(
	in_path = "./benchmarks/RezoJDM16K/", 
	nbatches = 100,
	threads = 8, 
	sampling_mode = "normal", 
	bern_flag = 1, 
	filter_flag = 1, 
	neg_ent = 35,
	neg_rel = 0
)

# dataloader for test
test_dataloader = TestDataLoader("./benchmarks/RezoJDM16K/", "link")

# define the model
transe = TransE(
	ent_tot = train_dataloader.get_ent_tot(),
	rel_tot = train_dataloader.get_rel_tot(),
	dim = 100, 
	p_norm = 1, 
	norm_flag = True
)


# define the loss function
model = NegativeSampling(
	model = transe, 
	loss = MarginLoss(margin = 4.0),
	batch_size = train_dataloader.get_batch_size()
)

# train the model
trainer = Trainer(model = model, data_loader = train_dataloader, train_times = 50, alpha = 1.0, use_gpu = True)
trainer.run()
transe.save_checkpoint('./checkpoint/transe.ckpt')

# test the model
transe.load_checkpoint('./checkpoint/transe.ckpt')
tester = Tester(model = transe, data_loader = test_dataloader, use_gpu = True)
# tester.run_link_prediction(type_constrain = False)

Finish initializing...


Epoch 49 | loss: 31.120690: 100%|██████████| 50/50 [04:45<00:00,  5.71s/it]


In [5]:
ent_embeddings = transe.ent_embeddings.weight.detach().cpu().numpy()
rel_embeddings = transe.rel_embeddings.weight.detach().cpu().numpy()
np.save("./benchmarks/RezoJDM16K/entity2vec100.npy", ent_embeddings)
np.save("./benchmarks/RezoJDM16K/relation2vec100.npy", rel_embeddings)

In [6]:
relation_embs = np.load("./benchmarks/RezoJDM16K/relation2vec100.npy")
with open("./benchmarks/RezoJDM16K/relation2vec100.init", "w") as fp:
    for relation_emb in list(relation_embs):
        line = "\t".join(str(w) for w in list(relation_emb))
        fp.write(f"{line}\t\n")

entity_embs = np.load("./benchmarks/RezoJDM16K/entity2vec100.npy")
with open("./benchmarks/RezoJDM16K/entity2vec100.init", "w") as fp:
    for entity_emb in list(entity_embs):
        line = "\t".join(str(w) for w in list(entity_emb))
        fp.write(f"{line}\t\n")

In [7]:
%cd benchmarks/RezoJDM16K/
!zip embeddings.zip relation2vec100.init entity2vec100.init

/content/OpenKE/benchmarks/RezoJDM16K
  adding: relation2vec100.init (deflated 58%)
  adding: entity2vec100.init (deflated 62%)


In [8]:
from google.colab import files
files.download("embeddings.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>