In [1]:
# Construction of dataset

import os, itertools, time, pickle
import subprocess
from xml.dom import minidom
from collections import Counter, OrderedDict
from operator import itemgetter
from nltk.corpus import wordnet
import tensorflow as tf
import tensorflow_hub as hub
from scipy import spatial
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import numpy as np
import scipy.sparse as sp
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from math import ceil, exp

USE_folder = "/home/vlead/USE"
alignment_folder = "reference-alignment/"

# Load reference alignments 
def load_alignments(folder):
    alignments = []
    for f in os.listdir(folder):
        doc = minidom.parse(folder + f)
        ls = list(zip(doc.getElementsByTagName('entity1'), doc.getElementsByTagName('entity2')))
        alignments.extend([(a.getAttribute('rdf:resource'), b.getAttribute('rdf:resource')) for (a,b) in ls])
    return alignments
        
reference_alignments = load_alignments(alignment_folder)

In [2]:
flatten = lambda l: [item for sublist in l for item in sublist]

class Ontology():
    def __init__(self, ontology):
        self.ontology = ontology
        self.ontology_obj = minidom.parse(ontology)
        self.root = self.ontology_obj.documentElement
        self.subclasses = self.parse_subclasses()
        self.object_properties = self.parse_object_properties()
        self.data_properties = self.parse_data_properties()
        self.triples = self.parse_triples()
        self.classes = self.parse_classes()
    
    def get_child_node(self, element, tag):
        return [e for e in element._get_childNodes() if type(e)==minidom.Element and e._get_tagName() == tag]
        
    def has_attribute_value(self, element, attribute, value):
        return True if element.getAttribute(attribute).split("#")[-1] == value else False
    
    def get_subclass_triples(self):
        return [(a,b,"subclass_of") for (a,b) in self.get_subclasses()]
    
    def parse_triples(self, union_flag=0, subclass_of=True):
        obj_props = self.object_properties
        data_props = self.data_properties
        props = obj_props + data_props
        all_triples = []
        for prop in props:
            domain_children = self.get_child_node(prop, "rdfs:domain")
            range_children = self.get_child_node(prop, "rdfs:range")
            domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children])
            range_prop = self.filter_null([self.extract_ID(el) for el in range_children])
            if not domain_children or not range_children:
                continue
            if not domain_prop:
                domain_prop = self.filter_null([self.extract_ID(el) for el in domain_children[0].getElementsByTagName("owl:Class")])
            if not range_prop:
                range_prop = self.filter_null([self.extract_ID(el) for el in range_children[0].getElementsByTagName("owl:Class")])
            if domain_prop and range_prop:
                if union_flag == 0:
                    all_triples.extend([(el[0], el[1], self.extract_ID(prop)) for el in list(itertools.product(domain_prop, range_prop))])
                else:
                    all_triples.append(("###".join(domain_prop), "###".join(range_prop), self.extract_ID(prop)))
        if subclass_of:
            all_triples.extend(self.get_subclass_triples())
        return list(set(all_triples))
    
    def get_triples(self, union_flag=0, subclass_of=True):
        if union_flag == 0:
            return self.triples
        else:
            return self.parse_triples(union_flag = 1, subclass_of = False)

    def parse_subclasses(self, union_flag=0):
        subclasses = self.root.getElementsByTagName("rdfs:subClassOf")
        subclass_pairs = []
        for el in subclasses:
            inline_subclasses = self.extract_ID(el)
            if inline_subclasses:
                subclass_pairs.append((el, el.parentNode))
            else:
                level1_class = self.get_child_node(el, "owl:Class")
                if not level1_class:
                    continue
                if self.extract_ID(level1_class[0]):
                    subclass_pairs.append((level1_class[0], el.parentNode))
                else:
                    level2classes = level1_class[0].getElementsByTagName("owl:Class")
                    
                    subclass_pairs.extend([(elem, el.parentNode) for elem in level2classes if self.extract_ID(elem)])
        return subclass_pairs
        
    def get_subclasses(self):
        return [(self.extract_ID(a), self.extract_ID(b)) for (a,b) in self.subclasses]
    
    def filter_null(self, data):
        return [el for el in data if el]
    
    def extract_ID(self, element):
        element_id = element.getAttribute("rdf:ID") or element.getAttribute("rdf:resource") or element.getAttribute("rdf:about")
        return element_id.split("#")[-1]
    
    def parse_classes(self):
        class_elems = [self.extract_ID(el) for el in self.root.getElementsByTagName("owl:Class")]
        subclass_classes = list(set(flatten([el[:-1] for el in self.triples])))
        return list(set(self.filter_null(class_elems + subclass_classes)))
    
    def get_classes(self):
        return self.classes
    
    def get_entities(self):
        entities = [self.extract_ID(el) for el in self.root.getElementsByTagName("owl:Class")]
        return list(set(self.filter_null(entities)))

    def parse_data_properties(self):
        data_properties = [el for el in self.get_child_node(self.root, 'owl:DatatypeProperty')]
        fn_data_properties = [el for el in self.get_child_node(self.root, 'owl:FunctionalProperty') if el]
        fn_data_properties = [el for el in fn_data_properties if type(el)==minidom.Element and 
            [el for el in self.get_child_node(el, "rdf:type") if 
             self.has_attribute_value(el, "rdf:resource", "DatatypeProperty")]]
        inv_fn_data_properties = [el for el in self.get_child_node(self.root, 'owl:InverseFunctionalProperty') if el]
        inv_fn_data_properties = [el for el in inv_fn_data_properties if type(el)==minidom.Element and 
            [el for el in self.get_child_node(el, "rdf:type") if 
             self.has_attribute_value(el, "rdf:resource", "DatatypeProperty")]]
        return data_properties + fn_data_properties + inv_fn_data_properties
        
    def parse_object_properties(self):
        obj_properties = [el for el in self.get_child_node(self.root, 'owl:ObjectProperty')]
        fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:FunctionalProperty') if el]
        fn_obj_properties = [el for el in fn_obj_properties if type(el)==minidom.Element and 
            [el for el in self.get_child_node(el, "rdf:type") if 
             self.has_attribute_value(el, "rdf:resource", "ObjectProperty")]]
        inv_fn_obj_properties = [el for el in self.get_child_node(self.root, 'owl:InverseFunctionalProperty') if el]
        inv_fn_obj_properties = [el for el in inv_fn_obj_properties if type(el)==minidom.Element and 
            [el for el in self.get_child_node(el, "rdf:type") if 
             self.has_attribute_value(el, "rdf:resource", "ObjectProperty")]]
        return obj_properties + fn_obj_properties + inv_fn_obj_properties
    
    def get_object_properties(self):
        obj_props = [self.extract_ID(el) for el in self.object_properties]
        return list(set(self.filter_null(obj_props)))
    
    def get_data_properties(self):
        data_props = [self.extract_ID(el) for el in self.data_properties]
        return list(set(self.filter_null(data_props)))




In [3]:
# Extracting USE embeddings

ontologies_in_alignment = [l.split(".")[0].split("-") for l in os.listdir("reference-alignment/")]

def extractUSEEmbeddings(words):
    try:
        embed = hub.KerasLayer(USE_folder)
    except Exception as e:
        !mkdir $USE_folder
        !curl -L "https://tfhub.dev/google/universal-sentence-encoder-large/5?tf-hub-format=compressed" | tar -zxvC $USE_folder
        embed = hub.KerasLayer(USE_folder)
        pass
    word_embeddings = embed(words)
    return word_embeddings.numpy()

def cos_sim(a,b):
    return 1 - spatial.distance.cosine(a, b)

def camel_case_split(identifier):
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
    return [m.group(0).lower() for m in matches]

def parse(word):
    return flatten([el.split("_") for el in camel_case_split(word)])
    

extracted_elems = []

for ont_name in list(set(flatten(ontologies_in_alignment))):
    ont = Ontology("conference_ontologies/" + ont_name + ".owl")
    entities = ont.get_entities()
    props = ont.get_object_properties() + ont.get_data_properties()
    triples = list(set(flatten(ont.get_triples(subclass_of=False))))
    extracted_elems.extend([ont_name + "#" + elem for elem in entities + props + triples])

extracted_elems = list(set(extracted_elems))

inp = [" ".join(parse(word.split("#")[1])) for word in extracted_elems]
vectorizer = TfidfVectorizer(token_pattern=r"(?u)\S+")
X = vectorizer.fit_transform(inp)
word2idx_tfidf = {word: i for (i, word)  in enumerate(vectorizer.get_feature_names())}
entity2idx_tfidf = {word.split("#")[1]: i for (i, word)  in enumerate(extracted_elems)}


print ("Total number of extracted unique classes and properties from entire RA set: ", len(extracted_elems))

embeds = extractUSEEmbeddings(inp)
embeddings = dict(zip(extracted_elems, embeds))    

Total number of extracted unique classes and properties from entire RA set:  834


In [4]:
# Type storage

types_dict = {}

def get_tfidf_score(word, phrase):
    return np.sum([X[entity2idx_tfidf[phrase]][:,word2idx_tfidf[word]][0,0] for word in parse(phrase)])
    
for ont_name in list(set(flatten(ontologies_in_alignment))):
    ont = Ontology("conference_ontologies/" + ont_name + ".owl")
    
    entities = ont.get_entities()
    props = ont.get_object_properties() + ont.get_data_properties()

    for entity in entities:
        types_dict[entity] = {"type": "entity"}
    for prop in props:
        types_dict[prop] = {"type": "property"}


In [5]:
# Combinatorial mapping generation

all_mappings = []
for l in ontologies_in_alignment:
    ont1 = Ontology("conference_ontologies/" + l[0] + ".owl")
    ont2 = Ontology("conference_ontologies/" + l[1] + ".owl")
    
    ent1 = ont1.get_entities()
    ent2 = ont2.get_entities()
    
    obj1 = ont1.get_object_properties()
    obj2 = ont2.get_object_properties()
    
    data1 = ont1.get_data_properties()
    data2 = ont2.get_data_properties()
    
    mappings = list(itertools.product(ent1, ent2)) + list(itertools.product(obj1, obj2)) + list(itertools.product(data1, data2))
    
    all_mappings.extend([(l[0] + "#" + el[0], l[1] + "#" + el[1]) for el in mappings])
    

In [6]:
gt_mappings = [tuple([elem.split("/")[-1] for elem in el]) for el in reference_alignments]

data = {}
for mapping in all_mappings:
    if mapping in gt_mappings:
        data[(mapping[0], mapping[1])] = True
    else:
        data[(mapping[0], mapping[1])] = False


In [7]:
def greedy_matching():
    global batch_size, test_data_t, test_data_f, model, optimizer, emb_indexer_inv, gt_mappings, all_metrics
    all_results = OrderedDict()
    with torch.no_grad():
        all_pred = []
        batch_size = min(batch_size, len(test_data_t))
        num_batches = int(ceil(len(test_data_t)/batch_size))

        np.random.shuffle(test_data_t)
        np.random.shuffle(test_data_f)

        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = (batch_idx+1) * batch_size

            batch_start_f = batch_idx * batch_size_f
            batch_end_f = (batch_idx+1) * batch_size_f
            
            pos_elems = np.array(test_data_t)[batch_start:batch_end]
            neg_elems = np.array(test_data_f)[batch_start_f:batch_end_f]
            optimizer.zero_grad()

            inputs = np.array([generate_data(elem) for elem in list(pos_elems) + list(neg_elems)])
            targets = np.array([1 for i in range(len(pos_elems))] + [0 for i in range(len(neg_elems))])

            indices = np.random.permutation(inputs.shape[0])
            inputs, targets = inputs[indices], targets[indices]
            inputs = torch.LongTensor(list(zip(*inputs)))
            targets = torch.DoubleTensor(targets)

            outputs = model(inputs)
#             write (("Outputs initially: ", str([str(s) for s in outputs])))
            outputs /= torch.sum(outputs, dim=1).view(-1, 1)
#             write (("Outputs Finally: ", str([str(s) for s in outputs])))
            outputs = [(1-el[1].item()) for el in outputs]
            
            
            targets = [True if el.item() else False for el in targets]

            for idx, pred_elem in enumerate(outputs):
                ent1 = emb_indexer_inv[inputs.numpy()[0][idx]]
                ent2 = emb_indexer_inv[inputs.numpy()[1][idx]]
                if (ent1, ent2) in all_results:
                    print ("Error: ", ent1, ent2, "already present")
                all_results[(ent1, ent2)] = (pred_elem, targets[idx])
        optimum_metrics, opt_threshold = [-1000 for i in range(5)], -1000
        low_threshold = np.min([el[0] for el in all_results.values()]) - 0.01
        high_threshold = np.max([el[0] for el in all_results.values()]) + 0.01
        for j,threshold in enumerate(np.arange(low_threshold, high_threshold, 0.01)):
            res = []
            for i,key in enumerate(all_results):
                if all_results[key][0] > threshold:
                    res.append(key)

            fn_list = [key for key in gt_mappings if key not in set(res) and not is_valid(test_onto, key)]
            fp_list = [elem for elem in res if not all_results[elem][1]]
            tp_list = [elem for elem in res if all_results[elem][1]]
            
            tp, fn, fp = len(tp_list), len(fn_list), len(fp_list)
            
            
            try:
                precision = tp/(tp+fp)
                recall = tp/(tp+fn)
                f1score = 2 * precision * recall / (precision + recall)
                f2score = 5 * precision * recall / (4 * precision + recall)
                f0_5score = 1.25 * precision * recall / (0.25 * precision + recall)
            except Exception as e:
                print (e)
                continue
            print (precision, recall, f1score, f2score, f0_5score)

            if f1score > optimum_metrics[2]:
                optimum_metrics = [precision, recall, f1score, f2score, f0_5score]
                opt_threshold = threshold
        
        print ("Precision: {} Recall: {} F1-Score: {} F2-Score: {} F0.5-Score: {}".format(
            precision, recall, f1score, f2score, f0_5score))
        all_metrics.append((opt_threshold, optimum_metrics))

    print ("Final Results: ", np.mean([el[1] for el in all_metrics], axis=0))
    print ("Best threshold: ", all_metrics[np.argmax([el[1][2] for el in all_metrics])][0])
    


In [8]:
def write(elem):
    f = open("Logs", "a+")
    if type(elem) == list or type(elem) == tuple:
        string = str("\n".join([str(s) for s in elem]))
    else:
        string = str(elem)
    f.write("\n"+string)
    f.close()
    

class SiameseNetwork(nn.Module):
    def __init__(self):
        super().__init__()    
        self.name_embedding = nn.Embedding(len(embeddings), 512)
        self.name_embedding.load_state_dict({'weight': torch.from_numpy(np.array(emb_vals))})

        self.dropout = dropout
        
        self.layer1 = nn.Bilinear(512, 512, 2)

    def forward(self, inputs):
        results = []
        for i in range(2):
            x = self.name_embedding(inputs[i])
            results.append(x)
        x = self.layer1(results[0], results[1])
        x = F.log_softmax(x)
        return x


In [10]:
emb_indexer = {word: i for i, word in enumerate(list(embeddings.keys()))}
emb_indexer_inv = {i: word for i, word in enumerate(list(embeddings.keys()))}
emb_vals = list(embeddings.values())

def is_valid(test_onto, key):
    return tuple([el.split("#")[0] for el in key]) not in test_onto

def generate_data(elem_tuple):
    return np.array([emb_indexer[elem] for elem in elem_tuple])

def generate_input(elems, target):
    inputs = np.array([generate_data(elem) for elem in list(elems)])
    targets = np.array([target for i in range(len(elems))])
    return inputs, targets
    
data_items = data.items()
np.random.shuffle(list(data_items))
data = OrderedDict(data_items)

print ("Number of entities:", len(data))
all_ont_pairs = list(set([tuple([el.split("#")[0] for el in l]) for l in data.keys()]))

all_metrics = []

for i in list(range(0, len(all_ont_pairs), 3)):
    
    test_onto = all_ont_pairs[i:i+3]
    
    train_data = {elem: data[elem] for elem in data if tuple([el.split("#")[0] for el in elem]) not in test_onto}
    test_data = {elem: data[elem] for elem in data if tuple([el.split("#")[0] for el in elem]) in test_onto}

    torch.set_default_dtype(torch.float64)
    
    train_test_split = 0.9

    train_data_t = [key for key in train_data if data[key]]
    train_data_f = [key for key in train_data if not data[key]][:len(train_data_t)]
    np.random.shuffle(train_data_f)
    
    lr = 0.001
    num_epochs = 50
    weight_decay = 0.001
    batch_size = 10
    dropout = 0.3
    batch_size = min(batch_size, len(train_data_t))
    num_batches = int(ceil((2 * len(train_data_t))/batch_size))
    batch_size_f = int(ceil((len(train_data_t) + len(train_data_f))/num_batches))
    
    model = SiameseNetwork()

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    for epoch in range(num_epochs):
        inputs_pos, targets_pos = generate_input(train_data_t, 1)
        inputs_neg, targets_neg = generate_input(train_data_f, 0)
        
        indices = np.random.permutation(len(inputs_pos) + len(inputs_neg))
        
        inputs = np.array(list(inputs_pos) + list(inputs_neg))[indices]
        targets = np.array(list(targets_pos) + list(targets_neg))[indices]
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = (batch_idx+1) * batch_size

            inp_elems = torch.LongTensor(inputs[batch_start:batch_end].T)
            targ_elems = torch.LongTensor(targets[batch_start:batch_end])
            optimizer.zero_grad()
            
            outputs = model(inp_elems)
            loss = F.nll_loss(outputs, targ_elems)
            loss.backward()

            optimizer.step()

            if batch_idx%10 == 0:
                print ("Epoch: {} Idx: {} Loss: {}".format(epoch, batch_idx, loss.item()))

    model.eval()
    
    test_data_t = [key for key in test_data if data[key]]
    test_data_f = [key for key in test_data if not data[key]]
    
    greedy_matching()

Number of entities: 122893




Epoch: 0 Idx: 0 Loss: 0.6904716193158114
Epoch: 0 Idx: 10 Loss: 0.4673768458987765
Epoch: 0 Idx: 20 Loss: 0.5128397816045347
Epoch: 0 Idx: 30 Loss: 0.2871761932894778
Epoch: 0 Idx: 40 Loss: 0.20952882537528983
Epoch: 0 Idx: 50 Loss: 0.2629302439740181
Epoch: 1 Idx: 0 Loss: 0.10647700034417391
Epoch: 1 Idx: 10 Loss: 0.06728570218856673
Epoch: 1 Idx: 20 Loss: 0.06947293649960161
Epoch: 1 Idx: 30 Loss: 0.21050096468266527
Epoch: 1 Idx: 40 Loss: 0.17217577921627317
Epoch: 1 Idx: 50 Loss: 0.08131735356136424
Epoch: 2 Idx: 0 Loss: 0.07619705134408947
Epoch: 2 Idx: 10 Loss: 0.08843265147417215
Epoch: 2 Idx: 20 Loss: 0.029602391230333962
Epoch: 2 Idx: 30 Loss: 0.03173278361480776
Epoch: 2 Idx: 40 Loss: 0.05781539566514966
Epoch: 2 Idx: 50 Loss: 0.07588250293905688
Epoch: 3 Idx: 0 Loss: 0.017449191296663228
Epoch: 3 Idx: 10 Loss: 0.021120509878298346
Epoch: 3 Idx: 20 Loss: 0.051861313982774174
Epoch: 3 Idx: 30 Loss: 0.02506248078597111
Epoch: 3 Idx: 40 Loss: 0.043501524238412935
Epoch: 3 Idx: 5

Epoch: 31 Idx: 0 Loss: 0.01375768787011208
Epoch: 31 Idx: 10 Loss: 0.01128853431666504
Epoch: 31 Idx: 20 Loss: 0.02303743896046259
Epoch: 31 Idx: 30 Loss: 0.013219020374034504
Epoch: 31 Idx: 40 Loss: 0.04766751487767239
Epoch: 31 Idx: 50 Loss: 0.017425282391128248
Epoch: 32 Idx: 0 Loss: 0.004671530875342861
Epoch: 32 Idx: 10 Loss: 0.020315180583205396
Epoch: 32 Idx: 20 Loss: 0.02127407431010752
Epoch: 32 Idx: 30 Loss: 0.05193325434341502
Epoch: 32 Idx: 40 Loss: 0.015641753023368635
Epoch: 32 Idx: 50 Loss: 0.0191459923626502
Epoch: 33 Idx: 0 Loss: 0.014619737520418761
Epoch: 33 Idx: 10 Loss: 0.007802515843764614
Epoch: 33 Idx: 20 Loss: 0.01818683193767325
Epoch: 33 Idx: 30 Loss: 0.011499714341887495
Epoch: 33 Idx: 40 Loss: 0.016755832311644427
Epoch: 33 Idx: 50 Loss: 0.023612952927369327
Epoch: 34 Idx: 0 Loss: 0.012024951141489684
Epoch: 34 Idx: 10 Loss: 0.023599004294391965
Epoch: 34 Idx: 20 Loss: 0.016022529208104902
Epoch: 34 Idx: 30 Loss: 0.014477887471527257
Epoch: 34 Idx: 40 Loss:

Epoch: 0 Idx: 0 Loss: 0.6888828150892183
Epoch: 0 Idx: 10 Loss: 0.6252131804715645
Epoch: 0 Idx: 20 Loss: 0.423054694396461
Epoch: 0 Idx: 30 Loss: 0.396607653684864
Epoch: 0 Idx: 40 Loss: 0.2577925052564079
Epoch: 0 Idx: 50 Loss: 0.19163707490823428
Epoch: 1 Idx: 0 Loss: 0.15883489032868658
Epoch: 1 Idx: 10 Loss: 0.06452948790348698
Epoch: 1 Idx: 20 Loss: 0.09298271393194628
Epoch: 1 Idx: 30 Loss: 0.4266168889110141
Epoch: 1 Idx: 40 Loss: 0.05035132945784606
Epoch: 1 Idx: 50 Loss: 0.06427405018365018
Epoch: 2 Idx: 0 Loss: 0.033435389080938224
Epoch: 2 Idx: 10 Loss: 0.01946565900713321
Epoch: 2 Idx: 20 Loss: 0.10561675970316142
Epoch: 2 Idx: 30 Loss: 0.03429596198461414
Epoch: 2 Idx: 40 Loss: 0.01446379346790527
Epoch: 2 Idx: 50 Loss: 0.09768657767687028
Epoch: 3 Idx: 0 Loss: 0.06123102077680477
Epoch: 3 Idx: 10 Loss: 0.00791749271582079
Epoch: 3 Idx: 20 Loss: 0.0368340859904594
Epoch: 3 Idx: 30 Loss: 0.0432281261435563
Epoch: 3 Idx: 40 Loss: 0.029884325787162706
Epoch: 3 Idx: 50 Loss: 

Epoch: 31 Idx: 20 Loss: 0.008613990237252993
Epoch: 31 Idx: 30 Loss: 0.022562250613752215
Epoch: 31 Idx: 40 Loss: 0.026111325887087467
Epoch: 31 Idx: 50 Loss: 0.09045767402657347
Epoch: 32 Idx: 0 Loss: 0.024850619107655972
Epoch: 32 Idx: 10 Loss: 0.009492018390808595
Epoch: 32 Idx: 20 Loss: 0.03492760471633144
Epoch: 32 Idx: 30 Loss: 0.029677030915421836
Epoch: 32 Idx: 40 Loss: 0.012683830056261055
Epoch: 32 Idx: 50 Loss: 0.011583318043497179
Epoch: 33 Idx: 0 Loss: 0.008062246731273875
Epoch: 33 Idx: 10 Loss: 0.016560355691262012
Epoch: 33 Idx: 20 Loss: 0.016948780178522298
Epoch: 33 Idx: 30 Loss: 0.02487405078867128
Epoch: 33 Idx: 40 Loss: 0.03378396331232435
Epoch: 33 Idx: 50 Loss: 0.01768377665284572
Epoch: 34 Idx: 0 Loss: 0.06784151494028016
Epoch: 34 Idx: 10 Loss: 0.0030725703316403187
Epoch: 34 Idx: 20 Loss: 0.015589035554103375
Epoch: 34 Idx: 30 Loss: 0.016635778945817702
Epoch: 34 Idx: 40 Loss: 0.02317000777799838
Epoch: 34 Idx: 50 Loss: 0.02864515753949807
Epoch: 35 Idx: 0 Los

Epoch: 8 Idx: 20 Loss: 0.01240296735905848
Epoch: 8 Idx: 30 Loss: 0.003674842811929831
Epoch: 8 Idx: 40 Loss: 0.020782790733824535
Epoch: 8 Idx: 50 Loss: 0.026175531672512974
Epoch: 9 Idx: 0 Loss: 0.0133866202996356
Epoch: 9 Idx: 10 Loss: 0.007797402167613057
Epoch: 9 Idx: 20 Loss: 0.0099754373524235
Epoch: 9 Idx: 30 Loss: 0.03632204687081844
Epoch: 9 Idx: 40 Loss: 0.017243033895008787
Epoch: 9 Idx: 50 Loss: 0.021799974169398117
Epoch: 10 Idx: 0 Loss: 0.01809448693557535
Epoch: 10 Idx: 10 Loss: 0.020141535927704943
Epoch: 10 Idx: 20 Loss: 0.016899343050489332
Epoch: 10 Idx: 30 Loss: 0.018077237263351982
Epoch: 10 Idx: 40 Loss: 0.03469927497721011
Epoch: 10 Idx: 50 Loss: 0.019821607347728025
Epoch: 11 Idx: 0 Loss: 0.03865319827829143
Epoch: 11 Idx: 10 Loss: 0.029023605275867576
Epoch: 11 Idx: 20 Loss: 0.008682983456761852
Epoch: 11 Idx: 30 Loss: 0.007925463692925355
Epoch: 11 Idx: 40 Loss: 0.011259081287183118
Epoch: 11 Idx: 50 Loss: 0.02382913813052264
Epoch: 12 Idx: 0 Loss: 0.01054422

Epoch: 39 Idx: 10 Loss: 0.02653192568863153
Epoch: 39 Idx: 20 Loss: 0.015862835806128475
Epoch: 39 Idx: 30 Loss: 0.030112165926090263
Epoch: 39 Idx: 40 Loss: 0.014624347427016552
Epoch: 39 Idx: 50 Loss: 0.021676284496766805
Epoch: 40 Idx: 0 Loss: 0.011039585927839782
Epoch: 40 Idx: 10 Loss: 0.013458001704125755
Epoch: 40 Idx: 20 Loss: 0.020047520425047795
Epoch: 40 Idx: 30 Loss: 0.06664481876438323
Epoch: 40 Idx: 40 Loss: 0.018371148082573445
Epoch: 40 Idx: 50 Loss: 0.02709263002037729
Epoch: 41 Idx: 0 Loss: 0.021763003341886973
Epoch: 41 Idx: 10 Loss: 0.012675815133477503
Epoch: 41 Idx: 20 Loss: 0.020114935858737172
Epoch: 41 Idx: 30 Loss: 0.0355013595755127
Epoch: 41 Idx: 40 Loss: 0.03294375212935162
Epoch: 41 Idx: 50 Loss: 0.023629565479522937
Epoch: 42 Idx: 0 Loss: 0.019187977476097677
Epoch: 42 Idx: 10 Loss: 0.01670113086708031
Epoch: 42 Idx: 20 Loss: 0.1380645940956215
Epoch: 42 Idx: 30 Loss: 0.00752188620591399
Epoch: 42 Idx: 40 Loss: 0.010363710365603507
Epoch: 42 Idx: 50 Loss:

Epoch: 17 Idx: 10 Loss: 0.023716867529351017
Epoch: 17 Idx: 20 Loss: 0.00946555325639856
Epoch: 17 Idx: 30 Loss: 0.01479566147344596
Epoch: 17 Idx: 40 Loss: 0.020279731090313526
Epoch: 17 Idx: 50 Loss: 0.03152877059265899
Epoch: 18 Idx: 0 Loss: 0.01067294199335641
Epoch: 18 Idx: 10 Loss: 0.019202655178865494
Epoch: 18 Idx: 20 Loss: 0.021524534144182428
Epoch: 18 Idx: 30 Loss: 0.008544427855381589
Epoch: 18 Idx: 40 Loss: 0.02013898260589498
Epoch: 18 Idx: 50 Loss: 0.01901056979497999
Epoch: 19 Idx: 0 Loss: 0.015014578865316106
Epoch: 19 Idx: 10 Loss: 0.009114476804462625
Epoch: 19 Idx: 20 Loss: 0.02025362709241154
Epoch: 19 Idx: 30 Loss: 0.021552406927161573
Epoch: 19 Idx: 40 Loss: 0.011726233585515599
Epoch: 19 Idx: 50 Loss: 0.009841645693832479
Epoch: 20 Idx: 0 Loss: 0.011002290272957292
Epoch: 20 Idx: 10 Loss: 0.016401724652137732
Epoch: 20 Idx: 20 Loss: 0.02077708656748195
Epoch: 20 Idx: 30 Loss: 0.011310591947774704
Epoch: 20 Idx: 40 Loss: 0.02694940433353706
Epoch: 20 Idx: 50 Loss

Epoch: 48 Idx: 0 Loss: 0.014203833738814741
Epoch: 48 Idx: 10 Loss: 0.015015946866703303
Epoch: 48 Idx: 20 Loss: 0.020950244177119305
Epoch: 48 Idx: 30 Loss: 0.01743334831380743
Epoch: 48 Idx: 40 Loss: 0.02393745246551584
Epoch: 48 Idx: 50 Loss: 0.02820653472161987
Epoch: 49 Idx: 0 Loss: 0.02052923846742661
Epoch: 49 Idx: 10 Loss: 0.01715281818829482
Epoch: 49 Idx: 20 Loss: 0.05891411701073335
Epoch: 49 Idx: 30 Loss: 0.01366692895353119
Epoch: 49 Idx: 40 Loss: 0.007091811811808775
Epoch: 49 Idx: 50 Loss: 0.023437296450704144
0.45652173913043476 1.0 0.626865671641791 0.8076923076923076 0.5121951219512195
0.45054945054945056 0.9761904761904762 0.6165413533834586 0.7915057915057915 0.5049261083743842
0.45054945054945056 0.9761904761904762 0.6165413533834586 0.7915057915057915 0.5049261083743842
0.45054945054945056 0.9761904761904762 0.6165413533834586 0.7915057915057915 0.5049261083743842
0.45054945054945056 0.9761904761904762 0.6165413533834586 0.7915057915057915 0.5049261083743842
0.450

Epoch: 17 Idx: 50 Loss: 0.018434968561740078
Epoch: 18 Idx: 0 Loss: 0.015899197889234738
Epoch: 18 Idx: 10 Loss: 0.012704236598768886
Epoch: 18 Idx: 20 Loss: 0.008433321658756154
Epoch: 18 Idx: 30 Loss: 0.011935199475033912
Epoch: 18 Idx: 40 Loss: 0.02033781566290667
Epoch: 18 Idx: 50 Loss: 0.020083797909808427
Epoch: 19 Idx: 0 Loss: 0.008786647248658987
Epoch: 19 Idx: 10 Loss: 0.00882960519184256
Epoch: 19 Idx: 20 Loss: 0.020033586340757126
Epoch: 19 Idx: 30 Loss: 0.01849044230858942
Epoch: 19 Idx: 40 Loss: 0.026794097165774668
Epoch: 19 Idx: 50 Loss: 0.013748636889257513
Epoch: 20 Idx: 0 Loss: 0.025251254731938056
Epoch: 20 Idx: 10 Loss: 0.01773442192909194
Epoch: 20 Idx: 20 Loss: 0.01795296392304695
Epoch: 20 Idx: 30 Loss: 0.026821269383982486
Epoch: 20 Idx: 40 Loss: 0.015436498534242182
Epoch: 20 Idx: 50 Loss: 0.020834970929901102
Epoch: 21 Idx: 0 Loss: 0.012801039731727696
Epoch: 21 Idx: 10 Loss: 0.014583117113263589
Epoch: 21 Idx: 20 Loss: 0.019313723530906393
Epoch: 21 Idx: 30 L

Epoch: 48 Idx: 40 Loss: 0.014761958033300878
Epoch: 48 Idx: 50 Loss: 0.02467693538982532
Epoch: 49 Idx: 0 Loss: 0.021871565968033784
Epoch: 49 Idx: 10 Loss: 0.019824565719840904
Epoch: 49 Idx: 20 Loss: 0.014802364951088224
Epoch: 49 Idx: 30 Loss: 0.01914938025225138
Epoch: 49 Idx: 40 Loss: 0.02679358678582865
Epoch: 49 Idx: 50 Loss: 0.020289190320933514
0.4444444444444444 1.0 0.6153846153846153 0.8 0.5
0.4507042253521127 1.0 0.6213592233009708 0.8040201005025125 0.5063291139240507
0.8571428571428571 0.5625 0.6792452830188678 0.6040268456375839 0.7758620689655172
0.875 0.4375 0.5833333333333334 0.4861111111111111 0.7291666666666666
1.0 0.0625 0.11764705882352941 0.07692307692307693 0.25
division by zero
Precision: 1.0 Recall: 0.0625 F1-Score: 0.11764705882352941 F2-Score: 0.07692307692307693 F0.5-Score: 0.25
Final Results:  [0.7834303  0.73128832 0.74182851 0.73212181 0.76252102]
Best threshold:  0.9653908909264555
Epoch: 0 Idx: 0 Loss: 0.69235633764587
Epoch: 0 Idx: 10 Loss: 0.49518106

Epoch: 27 Idx: 50 Loss: 0.02244179829541874
Epoch: 28 Idx: 0 Loss: 0.01773084218495734
Epoch: 28 Idx: 10 Loss: 0.020751462433702787
Epoch: 28 Idx: 20 Loss: 0.018790686834342006
Epoch: 28 Idx: 30 Loss: 0.014019426648463196
Epoch: 28 Idx: 40 Loss: 0.026982507288852015
Epoch: 28 Idx: 50 Loss: 0.05598697871940659
Epoch: 29 Idx: 0 Loss: 0.169809057334646
Epoch: 29 Idx: 10 Loss: 0.013449755112255884
Epoch: 29 Idx: 20 Loss: 0.026864271823002876
Epoch: 29 Idx: 30 Loss: 0.012493029160226878
Epoch: 29 Idx: 40 Loss: 0.006926261349429534
Epoch: 29 Idx: 50 Loss: 0.02543855930102441
Epoch: 30 Idx: 0 Loss: 0.015343844400394144
Epoch: 30 Idx: 10 Loss: 0.02962791088793606
Epoch: 30 Idx: 20 Loss: 0.018986337013877766
Epoch: 30 Idx: 30 Loss: 0.021131978980195528
Epoch: 30 Idx: 40 Loss: 0.01704493137338695
Epoch: 30 Idx: 50 Loss: 0.03222690044128661
Epoch: 31 Idx: 0 Loss: 0.011777188437544786
Epoch: 31 Idx: 10 Loss: 0.009776432049102727
Epoch: 31 Idx: 20 Loss: 0.016589872325468342
Epoch: 31 Idx: 30 Loss: 

Epoch: 6 Idx: 10 Loss: 0.020719433741762945
Epoch: 6 Idx: 20 Loss: 0.06316838632093227
Epoch: 6 Idx: 30 Loss: 0.04700552633906586
Epoch: 6 Idx: 40 Loss: 0.03507058092261353
Epoch: 7 Idx: 0 Loss: 0.01984587235277139
Epoch: 7 Idx: 10 Loss: 0.02922520112142112
Epoch: 7 Idx: 20 Loss: 0.011718381602691022
Epoch: 7 Idx: 30 Loss: 0.019810692345617498
Epoch: 7 Idx: 40 Loss: 0.019150968518086113
Epoch: 8 Idx: 0 Loss: 0.019878907938255815
Epoch: 8 Idx: 10 Loss: 0.016555817595163135
Epoch: 8 Idx: 20 Loss: 0.015549968809385372
Epoch: 8 Idx: 30 Loss: 0.01903341191463194
Epoch: 8 Idx: 40 Loss: 0.027996564952300296
Epoch: 9 Idx: 0 Loss: 0.012976629289255064
Epoch: 9 Idx: 10 Loss: 0.008837610302086265
Epoch: 9 Idx: 20 Loss: 0.025068369020720942
Epoch: 9 Idx: 30 Loss: 0.012152450691867803
Epoch: 9 Idx: 40 Loss: 0.017935636429112264
Epoch: 10 Idx: 0 Loss: 0.011906921804143059
Epoch: 10 Idx: 10 Loss: 0.01415555953481778
Epoch: 10 Idx: 20 Loss: 0.033993963026925686
Epoch: 10 Idx: 30 Loss: 0.02174655168673

Epoch: 43 Idx: 20 Loss: 0.019273481420519385
Epoch: 43 Idx: 30 Loss: 0.022259641505114156
Epoch: 43 Idx: 40 Loss: 0.01614839209482344
Epoch: 44 Idx: 0 Loss: 0.018592805490447215
Epoch: 44 Idx: 10 Loss: 0.011472921966752214
Epoch: 44 Idx: 20 Loss: 0.025349500936809023
Epoch: 44 Idx: 30 Loss: 0.03515570581995945
Epoch: 44 Idx: 40 Loss: 0.024806164890557966
Epoch: 45 Idx: 0 Loss: 0.012376729882595227
Epoch: 45 Idx: 10 Loss: 0.017231170859374582
Epoch: 45 Idx: 20 Loss: 0.020309848522225886
Epoch: 45 Idx: 30 Loss: 0.022413387749208657
Epoch: 45 Idx: 40 Loss: 0.03448369509442649
Epoch: 46 Idx: 0 Loss: 0.011662558922706904
Epoch: 46 Idx: 10 Loss: 0.016689014127495252
Epoch: 46 Idx: 20 Loss: 0.011201932551279022
Epoch: 46 Idx: 30 Loss: 0.021875886621237607
Epoch: 46 Idx: 40 Loss: 0.007444617454513335
Epoch: 47 Idx: 0 Loss: 0.01631546934518833
Epoch: 47 Idx: 10 Loss: 0.03420618507426496
Epoch: 47 Idx: 20 Loss: 0.014761056763306695
Epoch: 47 Idx: 30 Loss: 0.014025390253598505
Epoch: 47 Idx: 40 L

In [20]:
data

OrderedDict([(('conference#Workshop', 'ekaw#Workshop'), (1.0, 'T')),
             (('conference#Person', 'ekaw#Person'), (1.0, 'T')),
             (('conference#Invited_speaker', 'ekaw#Invited_Speaker'),
              (1.0, 'T')),
             (('conference#Tutorial', 'ekaw#Tutorial'), (1.0, 'T')),
             (('conference#Review', 'ekaw#Review'), (1.0, 'T')),
             (('conference#Conference_participant',
               'ekaw#Conference_Participant'),
              (1.0, 'F')),
             (('conference#Conference', 'ekaw#Conference'), (1.0, 'F')),
             (('conference#Abstract', 'ekaw#Abstract'), (1.0, 'T')),
             (('conference#Track', 'ekaw#Track'), (1.0, 'T')),
             (('conference#Paper', 'ekaw#Paper'), (1.0, 'F')),
             (('conference#Invited_talk', 'ekaw#Invited_Talk'), (1.0, 'F')),
             (('conference#Conference_proceedings',
               'ekaw#Conference_Proceedings'),
              (1.0, 'T')),
             (('confOf#Workshop', 'eka

In [201]:
entities_sub = [elem for elem in entities if elem.split("#")[0] == "conference"]
entity_idx = {elem: (i, emb_indexer[elem.split("#")[1]]) for (i,elem) in enumerate(entities_sub) }

In [274]:
def get_K_hop_neighbours(ont, K=1):
    triples = Ontology(ont).get_triples()
    entities = [(a,b) for (a,b,c) in triples]
    neighbours_dict = {}
    for entity_pair in entities:
        if entity[0] in neighbours_dict:
            if 
        if "children" neighbours_dict[entity]

{'Science_Worker': '0', 'Country': '1', 'Chair_PC': '2', 'boolean': '3', 'Conference': '4', 'Member_PC': '5', 'Administrative_event': '6', 'Thing': '7', 'Student': '8', 'Contribution': '9', 'Regular': '10', 'Volunteer': '11', 'Company': '12', 'Reviewing_results_event': '13', 'positiveInteger': '14', 'Workshop': '15', 'Person': '16', 'Camera_Ready_event': '17', 'Paper': '18', 'Submission_event': '19', 'Short_paper': '20', 'University': '21', 'Author': '22', 'Social_event': '23', 'Event': '24', 'Reception': '25', 'Registration_of_participants_event': '26', 'Administrator': '27', 'Member': '28', 'Assistant': '29', 'Topic': '30', 'Poster': '31', 'Reviewing_event': '32', 'Trip': '33', 'Tutorial': '34', 'Organization': '35', 'Working_event': '36', 'Banquet': '37', 'City': '38', 'Participant': '39', 'Scholar': '40', 'string': '41'}
confOf 42 0
{'Possible_Reviewer': '0', 'SC_Member': '1', 'Contributed_Talk': '2', 'Conference': '3', 'Accepted_Paper': '4', 'Agency_Staff_Member': '5', 'Programme_

In [25]:
data

OrderedDict([(('cmt#paperAssignmentToolsRunBy', 'confOf#reviewes'),
              (-0.1581176519393921, 'F')),
             (('edas#ParallelAndDistributedComputingTopic',
               'iasted#Departure'),
              (-0.15051385760307312, 'F')),
             (('edas#Excursion',
               'iasted#Deadline_for_notification_of_acceptance'),
              (-0.14760316908359528, 'F')),
             (('edas#ComputerNetworksMeasurementsTopic', 'iasted#Renting'),
              (-0.14124542474746704, 'F')),
             (('edas#ParallelAndDistributedComputingTopic', 'iasted#Tip'),
              (-0.13849233090877533, 'F')),
             (('cmt#runPaperAssignmentTools', 'confOf#reviewes'),
              (-0.13624201714992523, 'F')),
             (('conference#has_a_review_reference_or_expertise',
               'ekaw#organisedBy'),
              (-0.12926854193210602, 'F')),
             (('iasted#Deadline_for_notification_of_acceptance',
               'sigkdd#Hotel'),
              (

ValueError: expected sequence of length 3 at dim 1 (got 2)