In [3]:
import torch

from src.structure.knowledge_graph import KnowledgeGraph

from src.structure.knowledge_graph_index import KGIndex

kgi = KGIndex.load("data/FB15k-237-betae/kgindex.json")

In [4]:
def load_from_uma_pretrain(name='complex', kg='fb15k-237'):
    state_dict = torch.load(f"/home/zwanggc/Project/Truth-Value-Reasoning-on-Knowledge-Graphs/pretrain/uma-kge/{kg}/{kg}-{name}.pt")
    model_state_dict = state_dict['model'][0]
    print(model_state_dict.keys())
    ent_emb = model_state_dict['_entity_embedder.embeddings.weight']
    rel_emb = model_state_dict['_relation_embedder.embeddings.weight']
    # ent_emb = model_state_dict['_entity_embedder.embeddings.weight']
    # rel_emb = model_state_dict['_relation_embedder.embeddings.weight']
    return ent_emb, rel_emb

In [4]:
kgi.inverse_relation_id_to_name[1]

'-/location/country/form_of_government'

In [None]:
train_kg = KnowledgeGraph.create("data/FB15k-237-betae/train_kg.tsv", kgi)

In [None]:
print(kgi.inverse_relation_id_to_name[244])
print(kgi.inverse_relation_id_to_name[245])
print(train_kg.ht2r[(812, 748)])
print(train_kg.ht2r[(748, 812)])

In [7]:
valid_kg = KnowledgeGraph.create("data/FB15k-237-betae/valid_kg.tsv", kgi)

In [8]:
print(valid_kg.ht2r[(812, 748)])
print(valid_kg.ht2r[(748, 812)])

[245]
[244]


# Conclusion

In the current konwledge graph representation
- relationship and the inverse relationship is contained. The triples are paired
- train KG is the subset of valid KG, valid KG is the subset of test KG


# Converting Checkpoints

## analysis data indix

In [9]:
uma_rel_dict_file = "/home/zwanggc/Project/Truth-Value-Reasoning-on-Knowledge-Graphs/pretrain/uma-kge/fb15k-237/fb15k-237/relation_ids.del"

In [10]:
uma_rel_dict_try = dict()
with open(uma_rel_dict_file, 'rt') as f:
    i = 0
    for line in f.readlines():
        uma_id, uma_str = line.strip().split()
        uma_id = int(uma_id)
        uma_rel_dict_try[2*uma_id] = '+' + uma_str
        uma_rel_dict_try[2*uma_id+1] = '-' + uma_str

In [11]:
for key, uma_str in uma_rel_dict_try.items():
    kgi_str = kgi.inverse_relation_id_to_name[key]
    assert kgi_str == uma_str

In [12]:
uma_ent_dict_file = "/home/zwanggc/Project/Truth-Value-Reasoning-on-Knowledge-Graphs/pretrain/uma-kge/fb15k-237/fb15k-237/entity_ids.del"

In [13]:
uma_ent_dict_try = dict()
with open(uma_ent_dict_file, 'rt') as f:
    i = 0
    for line in f.readlines():
        uma_id, uma_str = line.strip().split()
        uma_id = int(uma_id)
        uma_ent_dict_try[uma_id] = uma_str

In [14]:
for key, uma_str in kgi.inverse_entity_id_to_name.items():
    kgi_str = uma_ent_dict_try[key]
    if uma_str != kgi_str:
        print(uma_str, kgi_str)

In [15]:
len(kgi.inverse_entity_id_to_name)

14505

## RotatE

In [18]:
from src.structure.nbp_rotate import RotatE

nbp = RotatE(num_entities=kgi.num_entities,
        num_relations=kgi.num_relations,
        embedding_dim=128)


ent_emb, rel_emb = load_from_uma_pretrain(name='rotate')

nbp._entity_embedding.weight.data = ent_emb[:nbp.num_entities, :]

for i in range(237):
    my_id = 2*i
    my_rid = 2 * i + 1
    kge_id = i
    kge_rid = 237 + i
    nbp._relation_embedding.weight.data[my_id] = rel_emb[kge_id]
    nbp._relation_embedding.weight.data[my_rid] = rel_emb[kge_rid]

torch.save(nbp.state_dict(), "pretrain/uma-nbp/fb15k-237-rotate.pt")

odict_keys(['_base_model._entity_embedder._embeddings.weight', '_base_model._relation_embedder._embeddings.weight', '_entity_embedder._embeddings.weight', '_relation_embedder._embeddings.weight'])


KeyError: '_entity_embedder.embeddings.weight'

## TransE

In [None]:
from src.structure.nbp_transe import TransE

nbp = TransE(num_entities=kgi.num_entities,
        num_relations=kgi.num_relations,
        embedding_dim=128,p=1,margin=1,scale=1,device='cpu')


ent_emb, rel_emb = load_from_uma_pretrain(name='transe')

nbp._entity_embedding.weight.data = ent_emb[:nbp.num_entities, :]

for i in range(237):
    my_id = 2*i
    my_rid = 2 * i + 1
    kge_id = i
    kge_rid = 237 + i
    nbp._relation_embedding.weight.data[my_id] = rel_emb[kge_id]
    nbp._relation_embedding.weight.data[my_rid] = rel_emb[kge_rid]

torch.save(nbp.state_dict(), "pretrain/uma-nbp/fb15k-237-transe.pt")

## DistMult

In [None]:
from src.structure.nbp_distmult import DistMult

nbp = DistMult(num_entities=kgi.num_entities,
        num_relations=kgi.num_relations,
        embedding_dim=256)


ent_emb, rel_emb = load_from_uma_pretrain(name='distmult')

nbp._entity_embedding.weight.data = ent_emb[:nbp.num_entities, :]

for i in range(237):
    my_id = 2*i
    my_rid = 2 * i + 1
    kge_id = i
    kge_rid = 237 + i
    nbp._relation_embedding.weight.data[my_id] = rel_emb[kge_id]
    nbp._relation_embedding.weight.data[my_rid] = rel_emb[kge_rid]

torch.save(nbp.state_dict(), "pretrain/uma-nbp/fb15k-237-distmult.pt")

## ConvE

In [4]:
state_dict = torch.load("/home/zwanggc/Project/Truth-Value-Reasoning-on-Knowledge-Graphs/pretrain/uma-kge/fb15k-237/fb15k-237-conve.pt")
model_state_dict = state_dict['model'][0]
for k in model_state_dict.keys():
    print(k, model_state_dict[k].shape)

_scorer.convolution.weight torch.Size([32, 1, 3, 3])
_scorer.convolution.bias torch.Size([32])
_scorer.bn1.running_mean torch.Size([32])
_scorer.bn1.running_var torch.Size([32])
_scorer.bn1.num_batches_tracked torch.Size([])
_scorer.bn2.running_mean torch.Size([288])
_scorer.bn2.running_var torch.Size([288])
_scorer.bn2.num_batches_tracked torch.Size([])
_scorer.projection.weight torch.Size([288, 15488])
_scorer.projection.bias torch.Size([288])
_base_model._entity_embedder.embeddings.weight torch.Size([14541, 289])
_base_model._relation_embedder.embeddings.weight torch.Size([474, 289])
_base_model._scorer.convolution.weight torch.Size([32, 1, 3, 3])
_base_model._scorer.convolution.bias torch.Size([32])
_base_model._scorer.bn1.running_mean torch.Size([32])
_base_model._scorer.bn1.running_var torch.Size([32])
_base_model._scorer.bn1.num_batches_tracked torch.Size([])
_base_model._scorer.bn2.running_mean torch.Size([288])
_base_model._scorer.bn2.running_var torch.Size([288])
_base_model.

In [5]:
model_state_dict['_entity_embedder.embeddings.weight'][:, 0]

tensor([-0.0693,  0.0062, -0.0652,  ..., -0.2642, -0.2789, -0.2581])

In [3]:
from src.structure.nbp_conve import ConvE

nbp = ConvE(num_entities=kgi.num_entities,
            num_relations=kgi.num_relations,
            embedding_dim=289,
            device='cpu')

In [6]:
ent_emb, rel_emb = load_from_uma_pretrain(name='conve')

nbp._entity_embedding.weight.data = ent_emb[:nbp.num_entities, :]

for i in range(237):
    my_id = 2*i
    my_rid = 2 * i + 1
    kge_id = i
    kge_rid = 237 + i
    nbp._relation_embedding.weight.data[my_id] = rel_emb[kge_id]
    nbp._relation_embedding.weight.data[my_rid] = rel_emb[kge_rid]

odict_keys(['_scorer.convolution.weight', '_scorer.convolution.bias', '_scorer.bn1.running_mean', '_scorer.bn1.running_var', '_scorer.bn1.num_batches_tracked', '_scorer.bn2.running_mean', '_scorer.bn2.running_var', '_scorer.bn2.num_batches_tracked', '_scorer.projection.weight', '_scorer.projection.bias', '_base_model._entity_embedder.embeddings.weight', '_base_model._relation_embedder.embeddings.weight', '_base_model._scorer.convolution.weight', '_base_model._scorer.convolution.bias', '_base_model._scorer.bn1.running_mean', '_base_model._scorer.bn1.running_var', '_base_model._scorer.bn1.num_batches_tracked', '_base_model._scorer.bn2.running_mean', '_base_model._scorer.bn2.running_var', '_base_model._scorer.bn2.num_batches_tracked', '_base_model._scorer.projection.weight', '_base_model._scorer.projection.bias', '_entity_embedder.embeddings.weight', '_relation_embedder.embeddings.weight'])


In [11]:
nbp.load_state_dict(model_state_dict, strict=False)

_IncompatibleKeys(missing_keys=['_entity_embedding.weight', '_relation_embedding.weight'], unexpected_keys=['_base_model._entity_embedder.embeddings.weight', '_base_model._relation_embedder.embeddings.weight', '_base_model._scorer.convolution.weight', '_base_model._scorer.convolution.bias', '_base_model._scorer.bn1.running_mean', '_base_model._scorer.bn1.running_var', '_base_model._scorer.bn1.num_batches_tracked', '_base_model._scorer.bn2.running_mean', '_base_model._scorer.bn2.running_var', '_base_model._scorer.bn2.num_batches_tracked', '_base_model._scorer.projection.weight', '_base_model._scorer.projection.bias', '_entity_embedder.embeddings.weight', '_relation_embedder.embeddings.weight'])

In [12]:
torch.save(nbp.state_dict(), "pretrain/uma-nbp/fb15k-237-conve.pt")

## RESCAL

In [14]:
state_dict = torch.load("/home/zwanggc/Project/Truth-Value-Reasoning-on-Knowledge-Graphs/pretrain/uma-kge/fb15k-237/fb15k-237-rescal.pt")
model_state_dict = state_dict['model'][0]
for k in model_state_dict.keys():
    print(k, model_state_dict[k].shape)

_entity_embedder.embeddings.weight torch.Size([14541, 128])
_relation_embedder.embeddings.weight torch.Size([237, 16384])


In [9]:
from src.structure.nbp_rescal import RESCAL

nbp = RESCAL(num_entities=kgi.num_entities,
        num_relations=kgi.num_relations,
        embedding_dim=128)


ent_emb, rel_emb = load_from_uma_pretrain(name='rescal')

print(ent_emb.shape)
print(rel_emb.shape)

nbp._entity_embedding.weight.data = ent_emb[:nbp.num_entities, :]

for i in range(237):
    my_id = 2*i
    my_rid = 2 * i + 1
    kge_id = i
    kge_rid = 237 + i
    nbp._relation_embedding.weight.data[my_id] = rel_emb[kge_id]
    nbp._relation_embedding.weight.data[my_rid] = rel_emb[kge_id].reshape(128, 128).transpose(0, 1).reshape(-1)

torch.save(nbp.state_dict(), "pretrain/uma-nbp/fb15k-237-rescal.pt")

odict_keys(['_entity_embedder.embeddings.weight', '_relation_embedder.embeddings.weight'])
torch.Size([14541, 128])
torch.Size([237, 16384])
