From a67bfd226707ea2dc7ff8aa14e8987710233219f Mon Sep 17 00:00:00 2001 From: baxtree Date: Fri, 10 Apr 2020 16:57:56 +0100 Subject: [PATCH 01/19] separate out global setup for tests and improve the importer --- pykg2vec/core/KGMeta.py | 1 - pykg2vec/test/test_model.py | 20 ++++++++++++++--- pykg2vec/test/test_setup.py | 9 ++++++++ pykg2vec/test/test_trainer.py | 5 ----- pykg2vec/test/test_tune_model.py | 5 ++++- pykg2vec/utils/bayesian_optimizer.py | 33 +++++++++++++++++++++++++++- 6 files changed, 62 insertions(+), 11 deletions(-) create mode 100644 pykg2vec/test/test_setup.py diff --git a/pykg2vec/core/KGMeta.py b/pykg2vec/core/KGMeta.py index 800ec3a..5adc33a 100644 --- a/pykg2vec/core/KGMeta.py +++ b/pykg2vec/core/KGMeta.py @@ -8,7 +8,6 @@ from abc import ABCMeta, abstractmethod import tensorflow as tf -from pykg2vec.utils.generator import TrainingStrategy class ModelMeta(tf.keras.Model): """ Meta Class for knowledge graph embedding algorithms""" diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index 11ec264..fdcc938 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -47,7 +47,21 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, trainer.build_model() trainer.train_model() -@pytest.mark.parametrize("model_name", ['complex', 'distmult', 'proje_pointwise', 'rescal', 'rotate', 'slm', 'transe', 'transh', 'transr', 'transd', 'transm', 'hole']) +@pytest.mark.parametrize("model_name", [ + 'complex', + 'complexn3', + 'distmult', + 'proje_pointwise', + 'rescal', + 'rotate', + 'slm', + 'transe', + 'transh', + 'transr', + 'transd', + 'transm', + 'hole', +]) def test_KGE_methods(model_name): """Function to test a set of KGE algorithsm.""" testing_function(model_name) @@ -63,7 +77,7 @@ def test_ConvKB(): def test_KG2E_EL_args(): """Function to test KG2E Algorithm with arguments.""" - testing_function('kg2e', distance_measure="expected_likelihood") + testing_function('kg2e_el', distance_measure="expected_likelihood") def test_KG2E_KL_args(): """Function to test KG2E Algorithm with arguments.""" @@ -75,7 +89,7 @@ def test_SMEL_args(): def test_SMEB_args(): """Function to test SME Algorithm with arguments.""" - testing_function('sme', bilinear=True) + testing_function('sme_bl', bilinear=True) def test_transE_display(): """Function to test transE display.""" diff --git a/pykg2vec/test/test_setup.py b/pykg2vec/test/test_setup.py new file mode 100644 index 0000000..ebc6ea5 --- /dev/null +++ b/pykg2vec/test/test_setup.py @@ -0,0 +1,9 @@ +import pytest +import tensorflow as tf + +@pytest.fixture(scope="session", autouse=True) +def run_tf_function_eagerly(request): + tf.config.experimental_run_functions_eagerly(True) + +def switch_on_eager_execution(): + pass \ No newline at end of file diff --git a/pykg2vec/test/test_trainer.py b/pykg2vec/test/test_trainer.py index e693a1a..c310ee0 100644 --- a/pykg2vec/test/test_trainer.py +++ b/pykg2vec/test/test_trainer.py @@ -4,16 +4,11 @@ This module is for testing unit functions of training """ import pytest -import tensorflow as tf from pykg2vec.config.config import KGEArgParser, Importer from pykg2vec.utils.trainer import Trainer, Monitor from pykg2vec.utils.kgcontroller import KnowledgeGraph -@pytest.fixture(scope="session", autouse=True) -def run_tf_function_eagerly(request): - tf.config.experimental_run_functions_eagerly(True) - @pytest.mark.skip(reason="This is a functional method.") def get_model(result_path_dir, configured_epochs, patience, config_key): args = KGEArgParser().get_args([]) diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index acc8ee1..ac3ad87 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -36,6 +36,7 @@ def tunning_function(name): 'transm', 'rescal', 'sme', + 'sme_bl', 'transd', 'transr', 'ntn', @@ -43,8 +44,10 @@ def tunning_function(name): 'hole', 'rotate', 'kg2e', + 'kg2e_el', 'complex', - 'distmult' + 'complexn3', + 'distmult', ]) def test_tuning(model_name): """Function to test the tuning function.""" diff --git a/pykg2vec/utils/bayesian_optimizer.py b/pykg2vec/utils/bayesian_optimizer.py index 4605e0f..2547f31 100644 --- a/pykg2vec/utils/bayesian_optimizer.py +++ b/pykg2vec/utils/bayesian_optimizer.py @@ -19,17 +19,42 @@ config_path = "pykg2vec.config.config" hyper_param_path = "pykg2vec.config.hyperparams" +moduleMap = {"complex": "Complex", + "complexn3": "Complex", + "conve": "ConvE", + "hole": "HoLE", + "distmult": "DistMult", + "kg2e": "KG2E", + "kg2e_el": "KG2E", + "ntn": "NTN", + "proje_pointwise": "ProjE_pointwise", + "rescal": "Rescal", + "rotate": "RotatE", + "slm": "SLM", + "sme": "SME", + "sme_bl": "SME", + "transd": "TransD", + "transe": "TransE", + "transg": "TransG", + "transh": "TransH", + "transm": "TransM", + "transr": "TransR", + "tucker": "TuckER"} + modelMap = {"complex": "Complex", + "complexn3": "ComplexN3", "conve": "ConvE", "hole": "HoLE", "distmult": "DistMult", "kg2e": "KG2E", + "kg2e_el": "KG2E_EL", "ntn": "NTN", "proje_pointwise": "ProjE_pointwise", "rescal": "Rescal", "rotate": "RotatE", "slm": "SLM", "sme": "SME", + "sme_bl": "SME_BL", "transd": "TransD", "transe": "TransE", "transg": "TransG", @@ -40,16 +65,19 @@ configMap = {"complex": "ComplexConfig", + "complexn3": "ComplexConfig", "conve": "ConvEConfig", "hole": "HoLEConfig", "distmult": "DistMultConfig", "kg2e": "KG2EConfig", + "kg2e_el": "KG2EConfig", "ntn": "NTNConfig", "proje_pointwise": "ProjE_pointwiseConfig", "rescal": "RescalConfig", "rotate": "RotatEConfig", "slm": "SLMConfig", "sme": "SMEConfig", + "sme_bl": "SMEConfig", "transd": "TransDConfig", "transe": "TransEConfig", "transg": "TransGConfig", @@ -60,16 +88,19 @@ hypMap = {"complex": "ComplexParams", + "complexn3": "ComplexParams", "conve": "ConvEParams", "hole": "HoLEParams", "distmult": "DistMultParams", "kg2e": "KG2EParams", + "kg2e_el": "KG2EParams", "ntn": "NTNParams", "proje_pointwise": "ProjE_pointwiseParams", "rescal": "RescalParams", "rotate": "RotatEParams", "slm": "SLMParams", "sme": "SMEParams", + "sme_bl": "SMEParams", "transd": "TransDParams", "transe": "TransEParams", "transg": "TransGParams", @@ -111,7 +142,7 @@ def __init__(self, args=None): self.knowledge_graph = KnowledgeGraph(dataset=args.dataset_name, custom_dataset_path=args.dataset_path) hyper_params = None try: - self.model_obj = getattr(importlib.import_module(model_path + ".%s" % modelMap[model_name]), + self.model_obj = getattr(importlib.import_module(model_path + ".%s" % moduleMap[model_name]), modelMap[model_name]) self.config_obj = getattr(importlib.import_module(config_path), configMap[model_name]) hyper_params = getattr(importlib.import_module(hyper_param_path), hypMap[model_name])() From 5945c6c51d727cd41fefbc421dcebd051eab279b Mon Sep 17 00:00:00 2001 From: baxtree Date: Fri, 10 Apr 2020 17:51:06 +0100 Subject: [PATCH 02/19] fix file missing during visualisation --- pykg2vec/test/{test_setup.py => fixtures.py} | 6 ++-- pykg2vec/utils/visualization.py | 35 ++++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) rename pykg2vec/test/{test_setup.py => fixtures.py} (51%) diff --git a/pykg2vec/test/test_setup.py b/pykg2vec/test/fixtures.py similarity index 51% rename from pykg2vec/test/test_setup.py rename to pykg2vec/test/fixtures.py index ebc6ea5..49b8edd 100644 --- a/pykg2vec/test/test_setup.py +++ b/pykg2vec/test/fixtures.py @@ -2,8 +2,6 @@ import tensorflow as tf @pytest.fixture(scope="session", autouse=True) -def run_tf_function_eagerly(request): +def switch_on_eager_execution(request): + """Setup eager execution within the pytest runtime for better visibility to Coverage.py""" tf.config.experimental_run_functions_eagerly(True) - -def switch_on_eager_execution(): - pass \ No newline at end of file diff --git a/pykg2vec/utils/visualization.py b/pykg2vec/utils/visualization.py index 7244823..150b076 100644 --- a/pykg2vec/utils/visualization.py +++ b/pykg2vec/utils/visualization.py @@ -52,10 +52,9 @@ def __init__(self, model=None, vis_opts=None): self.model = model - self.algo_list = ['Complex', 'ConvE','HoLE', 'DistMult', 'DistMult2', 'KG2E_EL','KG2E_KL', - 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', - 'RotatE', 'SLM', 'SME_Bilinear','SME_Linear', 'TransD', 'TransE', 'TransH', - 'TransM', 'TransR', 'TuckER'] + self.algo_list = ['Complex', 'ComplexN3', 'ConvE', 'HoLE', 'DistMult', 'DistMult2', 'KG2E_EL', 'KG2E_KL', + 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', 'RotatE', 'SLM', 'SME_Bilinear', 'SME_Linear', + 'TransD', 'TransE', 'TransH', 'TransM', 'TransR', 'TuckER'] self.h_name = [] self.r_name = [] @@ -170,19 +169,21 @@ def plot_train_result(self): file_no = len([c for c in files_lwcase if a.lower() in c if 'training' in c]) if file_no < 1: continue - with open(str(path / (a + '_Training_results_' + str(file_no - 1) + '.csv')), 'r') as fh: - df_2 = pd.read_csv(fh) - if df.empty: - df['Epochs'] = df_2['Epochs'] - df['Loss'] = df_2['Loss'] - df['Algorithm'] = [a] * len(df_2) - else: - df_3 = pd.DataFrame() - df_3['Epochs'] = df_2['Epochs'] - df_3['Loss'] = df_2['Loss'] - df_3['Algorithm'] = [a] * len(df_2) - frames = [df, df_3] - df = pd.concat(frames) + file_path = str(path / (a + '_Training_results_' + str(file_no - 1) + '.csv')) + if os.path.exists(file_path): + with open(str(path / (a + '_Training_results_' + str(file_no - 1) + '.csv')), 'r') as fh: + df_2 = pd.read_csv(fh) + if df.empty: + df['Epochs'] = df_2['Epochs'] + df['Loss'] = df_2['Loss'] + df['Algorithm'] = [a] * len(df_2) + else: + df_3 = pd.DataFrame() + df_3['Epochs'] = df_2['Epochs'] + df_3['Loss'] = df_2['Loss'] + df_3['Algorithm'] = [a] * len(df_2) + frames = [df, df_3] + df = pd.concat(frames) plt.figure() ax = seaborn.lineplot(x="Epochs", y="Loss", hue="Algorithm", markers=True, dashes=False, data=df) files = os.listdir(str(result)) From 8c003aff99d52b6d1e7c1cfc49c619623c3dddec Mon Sep 17 00:00:00 2001 From: baxtree Date: Fri, 10 Apr 2020 18:01:26 +0100 Subject: [PATCH 03/19] fix the test fixture file name --- pykg2vec/test/{fixtures.py => conftest.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pykg2vec/test/{fixtures.py => conftest.py} (80%) diff --git a/pykg2vec/test/fixtures.py b/pykg2vec/test/conftest.py similarity index 80% rename from pykg2vec/test/fixtures.py rename to pykg2vec/test/conftest.py index 49b8edd..54448cb 100644 --- a/pykg2vec/test/fixtures.py +++ b/pykg2vec/test/conftest.py @@ -4,4 +4,4 @@ @pytest.fixture(scope="session", autouse=True) def switch_on_eager_execution(request): """Setup eager execution within the pytest runtime for better visibility to Coverage.py""" - tf.config.experimental_run_functions_eagerly(True) + tf.config.experimental_run_functions_eagerly(True) \ No newline at end of file From 31cd6b060a54bd1a223b3a0b64aebc2622a12254 Mon Sep 17 00:00:00 2001 From: Louis Yu Date: Sun, 12 Apr 2020 23:47:39 -0700 Subject: [PATCH 04/19] Update hyperparams.py --- pykg2vec/config/hyperparams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py index a54726f..639ecbb 100644 --- a/pykg2vec/config/hyperparams.py +++ b/pykg2vec/config/hyperparams.py @@ -31,7 +31,7 @@ def __init__(self): 'complex' : {'learning_rate': 0.05,'hidden_size':200,'batch_size':5000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001}, 'distmult': {'learning_rate': 0.1,'hidden_size':100,'batch_size':50000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001}, 'proje_po': {'learning_rate': 0.01,'hidden_dropout': 0.5, 'hidden_size':200,'batch_size':200,' epochs':100, 'optimizer':'adam','lmbda':0.00001}, - 'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0, 'epochs':100}, + 'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0}, 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1} } } From e614c948e310c96f97baac0a71a352400ec6cbc5 Mon Sep 17 00:00:00 2001 From: baxtree Date: Mon, 13 Apr 2020 13:16:47 +0100 Subject: [PATCH 05/19] initial commit for adding the ANALOGY model --- pykg2vec/config/config.py | 59 ++++++++++++++++- pykg2vec/config/hyperparams.py | 41 +++++++++++- pykg2vec/core/ANALOGY.py | 98 ++++++++++++++++++++++++++++ pykg2vec/test/test_model.py | 1 + pykg2vec/test/test_tune_model.py | 1 + pykg2vec/utils/bayesian_optimizer.py | 12 ++-- pykg2vec/utils/trainer.py | 2 +- 7 files changed, 205 insertions(+), 9 deletions(-) create mode 100644 pykg2vec/core/ANALOGY.py diff --git a/pykg2vec/config/config.py b/pykg2vec/config/config.py index 3910ba9..3748105 100644 --- a/pykg2vec/config/config.py +++ b/pykg2vec/config/config.py @@ -61,7 +61,8 @@ def __init__(self): "transg": "TransG.TransG", "transm": "TransM.TransM", "transr": "TransR.TransR", - "tucker": "TuckER.TuckER"} + "tucker": "TuckER.TuckER", + "analogy": "ANALOGY.ANALOGY"} self.configMap = {"complex": "ComplexConfig", "complexn3": "ComplexConfig", @@ -84,7 +85,8 @@ def __init__(self): "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig"} + "tucker": "TuckERConfig", + "analogy": "ANALOGYConfig"} def import_model_config(self, name): """This function imports models and configuration. @@ -1480,4 +1482,57 @@ def __init__(self, args=None): 'neg_rate': self.neg_rate, } + BasicConfig.__init__(self, args) + +class ANALOGYConfig(BasicConfig): + """This class defines the configuration for the Complex Algorithm. + + ANALOGYConfig inherits the BasicConfig and defines the local arguements used in the + algorithm. + + Attributes: + hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning. + + Args: + lambda (float) : Weigth applied to the regularization in the loss function. + learning_rate (float): Defines the learning rate for the optimization. + L1_flag (bool): If True, perform L1 regularization on the model parameters. + hidden_size (int): Defines the size of the latent dimension for entities and relations. + batch_size (int): Defines the batch size for training the algorithm. + epochs (int): Defines the total number of epochs for training the algorithm. + margin (float): Defines the margin used between the positive and negative triple loss. + data (str): Defines the knowledge base dataset to be used for training the algorithm. + optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc. + sampling (str): Defines the sampling (bern or uniform) for corrupting the triples. + + """ + + def __init__(self, args=None): + self.lmbda = args.lmbda + self.learning_rate = args.learning_rate + self.hidden_size = args.hidden_size + self.batch_size = args.batch_training + self.epochs = args.epochs + self.data = args.dataset_name + self.optimizer = args.optimizer + self.sampling = args.sampling + self.neg_rate = args.negrate + + if args.exp is True: + paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'analogy') + for key, value in paper_params.items(): + self.__dict__[key] = value # copy all the setting from the paper. + + self.hyperparameters = { + 'lmbda': self.lmbda, + 'learning_rate': self.learning_rate, + 'hidden_size': self.hidden_size, + 'batch_size': self.batch_size, + 'epochs': self.epochs, + 'data': self.data, + 'optimizer': self.optimizer, + 'sampling': self.sampling, + 'neg_rate': self.neg_rate, + } + BasicConfig.__init__(self, args) \ No newline at end of file diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py index 639ecbb..440fad6 100644 --- a/pykg2vec/config/hyperparams.py +++ b/pykg2vec/config/hyperparams.py @@ -32,7 +32,9 @@ def __init__(self): 'distmult': {'learning_rate': 0.1,'hidden_size':100,'batch_size':50000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001}, 'proje_po': {'learning_rate': 0.01,'hidden_dropout': 0.5, 'hidden_size':200,'batch_size':200,' epochs':100, 'optimizer':'adam','lmbda':0.00001}, 'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0}, - 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1} + 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}, + 'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001} + } } @@ -698,4 +700,39 @@ def __init__(self): self.training_threshold = [1.0, 2.0, 3.0] self.ncluster = [3, 4, 5, 6, 7] self.CRP_factor = [0.01, 0.05, 0.1] - self.weight_norm = [True, False] \ No newline at end of file + self.weight_norm = [True, False] + +class ANALOGYParams: + """This class defines the hyperameters and its ranges for tuning ANALOGY algorithm. + + ANALOGYParams defines all the possibel values to be tuned for the algorithm. User may + change these values directly for performing the bayesian optimization of the hyper-parameters + + Args: + lambda (list) : List of floating point values. + feature_map_dropout (list) :List of floating point values. + input_dropout (list) : List of floating point values. + hidden_dropout (list) : List of floating point values. + use_bias (list) :List of boolean values. + label_smoothing (list) : List of floating point values. + lr_decay (float) : List of floating point values. + learning_rate (list): List of floating point values. + L1_flag (list): List of boolean values. + hidden_size (list): List of integer values. + batch_size (list): List of integer values. + epochs (list): List of integer values. + margin (list): List of floating point values. + optimizer (list): List of strings defining the optimization algorithm to be used. + sampling (list): List of string defining the sampling to be used for generating negative examples. + + """ + + def __init__(self): + self.search_space = { + 'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)), + 'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)), + 'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)), + 'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)), + 'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']), + 'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs. + } \ No newline at end of file diff --git a/pykg2vec/core/ANALOGY.py b/pykg2vec/core/ANALOGY.py new file mode 100644 index 0000000..30a6210 --- /dev/null +++ b/pykg2vec/core/ANALOGY.py @@ -0,0 +1,98 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from pykg2vec.core.KGMeta import ModelMeta +from pykg2vec.utils.generator import TrainingStrategy + + +class ANALOGY(ModelMeta): + + def __init__(self, config): + super(ANALOGY, self).__init__() + self.config = config + self.model_name = 'ANALOGY' + self.training_strategy = TrainingStrategy.POINTWISE_BASED + + def def_parameters(self): + """Defines the model parameters. + + Attributes: + num_total_ent (int): Total number of entities. + num_total_rel (int): Total number of relations. + k (Tensor): Size of the latent dimesnion for entities and relations. + ent_embeddings (Tensor Variable): Lookup variable containing embedding of the entities. + rel_embeddings (Tensor Variable): Lookup variable containing embedding of the relations. + b (Tensor Variable): Variable storing the bias values. + parameter_list (list): List of Tensor parameters. + """ + num_total_ent = self.config.kg_meta.tot_entity + num_total_rel = self.config.kg_meta.tot_relation + k = self.config.hidden_size + + emb_initializer = tf.initializers.glorot_normal() + self.ent_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_embedding") + self.rel_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_embedding") + self.ent_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="emb_e_real") + self.ent_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="emb_e_img") + self.rel_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="emb_rel_real") + self.rel_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="emb_rel_img") + self.parameter_list = [self.ent_embeddings, self.rel_embeddings, self.ent_embeddings_real, self.ent_embeddings_img, self.rel_embeddings_real, self.rel_embeddings_img] + + def embed1(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns real and imaginary values of head, relation and tail embedding. + """ + h_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, h) + h_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, h) + + r_emb_real = tf.nn.embedding_lookup(self.rel_embeddings_real, r) + r_emb_img = tf.nn.embedding_lookup(self.rel_embeddings_img, r) + + t_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, t) + t_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, t) + + return h_emb_real, h_emb_img, r_emb_real, r_emb_img, t_emb_real, t_emb_img + + def embed2(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns head, relation and tail embedding Tensors. + """ + h_emb = tf.nn.embedding_lookup(self.ent_embeddings, h) + r_emb = tf.nn.embedding_lookup(self.rel_embeddings, r) + t_emb = tf.nn.embedding_lookup(self.ent_embeddings, t) + + return h_emb, r_emb, t_emb + + def forward(self, h, r, t): + h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed1(h, r, t) + h_e, r_e, t_e = self.embed2(h, r, t) + return -tf.reduce_sum( + h_e_real * t_e_real * r_e_real + h_e_img * t_e_img * r_e_real + h_e_real * t_e_img * r_e_img - h_e_img * t_e_real * r_e_img, -1) \ + + -tf.reduce_sum(h_e * r_e * t_e, -1) + + # TODO: double check if we need the regularizer here + def get_reg(self, h, r, t): + h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed1(h, r, t) + h_e, r_e, t_e = self.embed2(h, r, t) + + regul_term = tf.reduce_mean(tf.reduce_sum(h_e_real**2, -1) + tf.reduce_sum(h_e_img**2, -1) + tf.reduce_sum(r_e_real**2,-1) + + tf.reduce_sum(r_e_img**2, -1) + tf.reduce_sum(t_e_real**2, -1) + tf.reduce_sum(t_e_img**2, -1) + + tf.reduce_sum(h_e**2, -1) + tf.reduce_sum(r_e**2, -1) + tf.reduce_sum(t_e**2,-1)) + return self.config.lmbda*regul_term diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index fdcc938..5a986c2 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -61,6 +61,7 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, 'transd', 'transm', 'hole', + 'analogy', ]) def test_KGE_methods(model_name): """Function to test a set of KGE algorithsm.""" diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index ac3ad87..18d99d3 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -48,6 +48,7 @@ def tunning_function(name): 'complex', 'complexn3', 'distmult', + 'analogy', ]) def test_tuning(model_name): """Function to test the tuning function.""" diff --git a/pykg2vec/utils/bayesian_optimizer.py b/pykg2vec/utils/bayesian_optimizer.py index 2547f31..b2900a0 100644 --- a/pykg2vec/utils/bayesian_optimizer.py +++ b/pykg2vec/utils/bayesian_optimizer.py @@ -39,7 +39,8 @@ "transh": "TransH", "transm": "TransM", "transr": "TransR", - "tucker": "TuckER"} + "tucker": "TuckER", + "analogy": "ANALOGY"} modelMap = {"complex": "Complex", "complexn3": "ComplexN3", @@ -61,7 +62,8 @@ "transh": "TransH", "transm": "TransM", "transr": "TransR", - "tucker": "TuckER"} + "tucker": "TuckER", + "analogy": "ANALOGY"} configMap = {"complex": "ComplexConfig", @@ -84,7 +86,8 @@ "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig"} + "tucker": "TuckERConfig", + "analogy": "ANALOGYConfig"} hypMap = {"complex": "ComplexParams", @@ -107,7 +110,8 @@ "transh": "TransHParams", "transm": "TransMParams", "transr": "TransRParams", - "tucker": "TuckERParams"} + "tucker": "TuckERParams", + "analogy": "ANALOGYParams"} class BaysOptimizer(object): diff --git a/pykg2vec/utils/trainer.py b/pykg2vec/utils/trainer.py index b13696e..e156b20 100644 --- a/pykg2vec/utils/trainer.py +++ b/pykg2vec/utils/trainer.py @@ -204,7 +204,7 @@ def train_step_pointwise(self, h, r, t, y): loss = tf.reduce_mean(tf.nn.softplus(y*preds)) - if hasattr(self.model, 'get_reg'): # for complex & complex-N3 & DistMult + if hasattr(self.model, 'get_reg'): # for complex & complex-N3 & DistMult & ANALOGY loss += self.model.get_reg(h, r, t) gradients = tape.gradient(loss, self.model.trainable_variables) From 2af8d2d54847c89f97d37e06eabdaf94d13e1ca2 Mon Sep 17 00:00:00 2001 From: baxtree Date: Mon, 13 Apr 2020 17:07:38 +0100 Subject: [PATCH 06/19] initial commit for adding the CP model --- pykg2vec/config/config.py | 63 ++++++++++++++++++++- pykg2vec/config/hyperparams.py | 41 +++++++++++++- pykg2vec/core/CP.py | 76 +++++++++++++++++++++++++ pykg2vec/test/test_model.py | 83 ++++++++++++++-------------- pykg2vec/test/test_tune_model.py | 35 ++++++------ pykg2vec/utils/bayesian_optimizer.py | 12 ++-- 6 files changed, 243 insertions(+), 67 deletions(-) create mode 100644 pykg2vec/core/CP.py diff --git a/pykg2vec/config/config.py b/pykg2vec/config/config.py index 3910ba9..79cda10 100644 --- a/pykg2vec/config/config.py +++ b/pykg2vec/config/config.py @@ -61,7 +61,8 @@ def __init__(self): "transg": "TransG.TransG", "transm": "TransM.TransM", "transr": "TransR.TransR", - "tucker": "TuckER.TuckER"} + "tucker": "TuckER.TuckER", + "cp": "CP.CP"} self.configMap = {"complex": "ComplexConfig", "complexn3": "ComplexConfig", @@ -84,7 +85,8 @@ def __init__(self): "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig"} + "tucker": "TuckERConfig", + "cp": "CPConfig"} def import_model_config(self, name): """This function imports models and configuration. @@ -1318,6 +1320,7 @@ def __init__(self, args=None): BasicConfig.__init__(self, args) + class TuckERConfig(BasicConfig): """This class defines the configuration for the TuckER Algorithm. @@ -1480,4 +1483,58 @@ def __init__(self, args=None): 'neg_rate': self.neg_rate, } - BasicConfig.__init__(self, args) \ No newline at end of file + BasicConfig.__init__(self, args) + + +class CPConfig(BasicConfig): + """This class defines the configuration for the Canonical Tensor Decomposition Algorithm. + + CPConfig inherits the BasicConfig and defines the local arguements used in the + algorithm. + + Attributes: + hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning. + + Args: + lambda (float) : Weigth applied to the regularization in the loss function. + learning_rate (float): Defines the learning rate for the optimization. + L1_flag (bool): If True, perform L1 regularization on the model parameters. + hidden_size (int): Defines the size of the latent dimension for entities and relations. + batch_size (int): Defines the batch size for training the algorithm. + epochs (int): Defines the total number of epochs for training the algorithm. + margin (float): Defines the margin used between the positive and negative triple loss. + data (str): Defines the knowledge base dataset to be used for training the algorithm. + optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc. + sampling (str): Defines the sampling (bern or uniform) for corrupting the triples. + + """ + + def __init__(self, args=None): + self.lmbda = args.lmbda + self.learning_rate = args.learning_rate + self.hidden_size = args.hidden_size + self.batch_size = args.batch_training + self.epochs = args.epochs + self.data = args.dataset_name + self.optimizer = args.optimizer + self.sampling = args.sampling + self.neg_rate = args.negrate + + if args.exp is True: + paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'cp') + for key, value in paper_params.items(): + self.__dict__[key] = value # copy all the setting from the paper. + + self.hyperparameters = { + 'lmbda': self.lmbda, + 'learning_rate': self.learning_rate, + 'hidden_size': self.hidden_size, + 'batch_size': self.batch_size, + 'epochs': self.epochs, + 'data': self.data, + 'optimizer': self.optimizer, + 'sampling': self.sampling, + 'neg_rate': self.neg_rate, + } + + BasicConfig.__init__(self, args) diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py index 639ecbb..cc11253 100644 --- a/pykg2vec/config/hyperparams.py +++ b/pykg2vec/config/hyperparams.py @@ -32,7 +32,8 @@ def __init__(self): 'distmult': {'learning_rate': 0.1,'hidden_size':100,'batch_size':50000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001}, 'proje_po': {'learning_rate': 0.01,'hidden_dropout': 0.5, 'hidden_size':200,'batch_size':200,' epochs':100, 'optimizer':'adam','lmbda':0.00001}, 'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0}, - 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1} + 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}, + 'cp': {'learning_rate': 0.01, 'hidden_size': 50, 'batch_size': 128, 'epochs': 50, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001}, } } @@ -698,4 +699,40 @@ def __init__(self): self.training_threshold = [1.0, 2.0, 3.0] self.ncluster = [3, 4, 5, 6, 7] self.CRP_factor = [0.01, 0.05, 0.1] - self.weight_norm = [True, False] \ No newline at end of file + self.weight_norm = [True, False] + + +class CPParams: + """This class defines the hyperameters and its ranges for tuning Canonical Tensor Decomposition algorithm. + + CPParams defines all the possibel values to be tuned for the algorithm. User may + change these values directly for performing the bayesian optimization of the hyper-parameters + + Args: + lambda (list) : List of floating point values. + feature_map_dropout (list) :List of floating point values. + input_dropout (list) : List of floating point values. + hidden_dropout (list) : List of floating point values. + use_bias (list) :List of boolean values. + label_smoothing (list) : List of floating point values. + lr_decay (float) : List of floating point values. + learning_rate (list): List of floating point values. + L1_flag (list): List of boolean values. + hidden_size (list): List of integer values. + batch_size (list): List of integer values. + epochs (list): List of integer values. + margin (list): List of floating point values. + optimizer (list): List of strings defining the optimization algorithm to be used. + sampling (list): List of string defining the sampling to be used for generating negative examples. + + """ + + def __init__(self): + self.search_space = { + 'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)), + 'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)), + 'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)), + 'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)), + 'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']), + 'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs. + } diff --git a/pykg2vec/core/CP.py b/pykg2vec/core/CP.py new file mode 100644 index 0000000..5ca28af --- /dev/null +++ b/pykg2vec/core/CP.py @@ -0,0 +1,76 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from pykg2vec.core.KGMeta import ModelMeta +from pykg2vec.utils.generator import TrainingStrategy + + +class CP(ModelMeta): + + def __init__(self, config): + super(CP, self).__init__() + self.config = config + self.model_name = 'CP' + self.training_strategy = TrainingStrategy.PROJECTION_BASED + + def def_parameters(self): + """Defines the model parameters. + + Attributes: + num_total_ent (int): Total number of entities. + num_total_rel (int): Total number of relations. + k (Tensor): Size of the latent dimesnion for entities and relations. + ent_embeddings (Tensor Variable): Lookup variable containing embedding of the entities. + rel_embeddings (Tensor Variable): Lookup variable containing embedding of the relations. + b (Tensor Variable): Variable storing the bias values. + parameter_list (list): List of Tensor parameters. + """ + num_total_ent = self.config.kg_meta.tot_entity + num_total_rel = self.config.kg_meta.tot_relation + k = self.config.hidden_size + + emb_initializer = tf.initializers.glorot_normal() + self.ent_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_embedding") + self.rel_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_embedding") + self.parameter_list = [self.ent_embeddings, self.rel_embeddings] + + def embed(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns head, relation and tail embedding Tensors. + """ + emb_h = tf.nn.embedding_lookup(self.ent_embeddings, h) + emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) + emb_t = tf.nn.embedding_lookup(self.ent_embeddings, t) + return emb_h, emb_r, emb_t + + def forward(self, e, r, er_e2, direction=None): + emb_e = tf.nn.embedding_lookup(self.ent_embeddings, e) + emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) + + return -tf.reduce_mean(tf.keras.backend.binary_crossentropy(er_e2, (emb_e * emb_r) @ tf.transpose(self.ent_embeddings))) + + def get_reg(self): + return self.config.lmbda * (tf.reduce_sum(tf.reduce_sum(tf.abs(self.ent_embeddings) ** 3) + tf.reduce_sum( + tf.abs(self.rel_embeddings) ** 3))) + + def predict_tail_rank(self, h, r, topk=-1): + emb_h = tf.nn.embedding_lookup(self.ent_embeddings, h) + emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) + + candidates = -(emb_h * emb_r) @ tf.transpose(self.ent_embeddings) + _, rank = tf.nn.top_k(candidates, k=topk) + + return rank + + def predict_head_rank(self, t, r, topk=-1): + return self.predict_tail_rank(t, r, topk) diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index fdcc938..d843f30 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -48,49 +48,50 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, trainer.train_model() @pytest.mark.parametrize("model_name", [ - 'complex', - 'complexn3', - 'distmult', - 'proje_pointwise', - 'rescal', - 'rotate', - 'slm', - 'transe', - 'transh', - 'transr', - 'transd', - 'transm', - 'hole', + # 'complex', + # 'complexn3', + # 'distmult', + # 'proje_pointwise', + # 'rescal', + # 'rotate', + # 'slm', + # 'transe', + # 'transh', + # 'transr', + # 'transd', + # 'transm', + # 'hole', + 'cp', ]) def test_KGE_methods(model_name): """Function to test a set of KGE algorithsm.""" testing_function(model_name) -def test_NTN(): - testing_function('ntn', ent_hidden_size=10, rel_hidden_size=10) # for avoiding OOM. - -def test_ConvE(): - testing_function('conve', channels=2) # for avoiding OOM. - -def test_ConvKB(): - testing_function('convkb', channels=2) # for avoiding OOM. - -def test_KG2E_EL_args(): - """Function to test KG2E Algorithm with arguments.""" - testing_function('kg2e_el', distance_measure="expected_likelihood") - -def test_KG2E_KL_args(): - """Function to test KG2E Algorithm with arguments.""" - testing_function('kg2e', distance_measure="kl_divergence") - -def test_SMEL_args(): - """Function to test SME Algorithm with arguments.""" - testing_function('sme', bilinear=False) - -def test_SMEB_args(): - """Function to test SME Algorithm with arguments.""" - testing_function('sme_bl', bilinear=True) - -def test_transE_display(): - """Function to test transE display.""" - testing_function('transe', display=True) \ No newline at end of file +# def test_NTN(): +# testing_function('ntn', ent_hidden_size=10, rel_hidden_size=10) # for avoiding OOM. +# +# def test_ConvE(): +# testing_function('conve', channels=2) # for avoiding OOM. +# +# def test_ConvKB(): +# testing_function('convkb', channels=2) # for avoiding OOM. +# +# def test_KG2E_EL_args(): +# """Function to test KG2E Algorithm with arguments.""" +# testing_function('kg2e_el', distance_measure="expected_likelihood") +# +# def test_KG2E_KL_args(): +# """Function to test KG2E Algorithm with arguments.""" +# testing_function('kg2e', distance_measure="kl_divergence") +# +# def test_SMEL_args(): +# """Function to test SME Algorithm with arguments.""" +# testing_function('sme', bilinear=False) +# +# def test_SMEB_args(): +# """Function to test SME Algorithm with arguments.""" +# testing_function('sme_bl', bilinear=True) +# +# def test_transE_display(): +# """Function to test transE display.""" +# testing_function('transe', display=True) \ No newline at end of file diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index ac3ad87..fcd6561 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -31,23 +31,24 @@ def tunning_function(name): @pytest.mark.parametrize('model_name', [ - 'transe', - 'transh', - 'transm', - 'rescal', - 'sme', - 'sme_bl', - 'transd', - 'transr', - 'ntn', - 'slm', - 'hole', - 'rotate', - 'kg2e', - 'kg2e_el', - 'complex', - 'complexn3', - 'distmult', + # 'transe', + # 'transh', + # 'transm', + # 'rescal', + # 'sme', + # 'sme_bl', + # 'transd', + # 'transr', + # 'ntn', + # 'slm', + # 'hole', + # 'rotate', + # 'kg2e', + # 'kg2e_el', + # 'complex', + # 'complexn3', + # 'distmult', + 'cp', ]) def test_tuning(model_name): """Function to test the tuning function.""" diff --git a/pykg2vec/utils/bayesian_optimizer.py b/pykg2vec/utils/bayesian_optimizer.py index 2547f31..9c5f1dc 100644 --- a/pykg2vec/utils/bayesian_optimizer.py +++ b/pykg2vec/utils/bayesian_optimizer.py @@ -39,7 +39,8 @@ "transh": "TransH", "transm": "TransM", "transr": "TransR", - "tucker": "TuckER"} + "tucker": "TuckER", + "cp": "CP"} modelMap = {"complex": "Complex", "complexn3": "ComplexN3", @@ -61,7 +62,8 @@ "transh": "TransH", "transm": "TransM", "transr": "TransR", - "tucker": "TuckER"} + "tucker": "TuckER", + "cp": "CP"} configMap = {"complex": "ComplexConfig", @@ -84,7 +86,8 @@ "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig"} + "tucker": "TuckERConfig", + "cp": "CPConfig"} hypMap = {"complex": "ComplexParams", @@ -107,7 +110,8 @@ "transh": "TransHParams", "transm": "TransMParams", "transr": "TransRParams", - "tucker": "TuckERParams"} + "tucker": "TuckERParams", + "cp": "CPParams"} class BaysOptimizer(object): From db9106545e675b5773b2c0a36a7d7219fba105e1 Mon Sep 17 00:00:00 2001 From: baxtree Date: Mon, 13 Apr 2020 17:16:02 +0100 Subject: [PATCH 07/19] remove commented out code --- pykg2vec/test/test_model.py | 82 ++++++++++++++++---------------- pykg2vec/test/test_tune_model.py | 34 ++++++------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index d843f30..5d116ef 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -48,50 +48,50 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, trainer.train_model() @pytest.mark.parametrize("model_name", [ - # 'complex', - # 'complexn3', - # 'distmult', - # 'proje_pointwise', - # 'rescal', - # 'rotate', - # 'slm', - # 'transe', - # 'transh', - # 'transr', - # 'transd', - # 'transm', - # 'hole', + 'complex', + 'complexn3', + 'distmult', + 'proje_pointwise', + 'rescal', + 'rotate', + 'slm', + 'transe', + 'transh', + 'transr', + 'transd', + 'transm', + 'hole', 'cp', ]) def test_KGE_methods(model_name): """Function to test a set of KGE algorithsm.""" testing_function(model_name) -# def test_NTN(): -# testing_function('ntn', ent_hidden_size=10, rel_hidden_size=10) # for avoiding OOM. -# -# def test_ConvE(): -# testing_function('conve', channels=2) # for avoiding OOM. -# -# def test_ConvKB(): -# testing_function('convkb', channels=2) # for avoiding OOM. -# -# def test_KG2E_EL_args(): -# """Function to test KG2E Algorithm with arguments.""" -# testing_function('kg2e_el', distance_measure="expected_likelihood") -# -# def test_KG2E_KL_args(): -# """Function to test KG2E Algorithm with arguments.""" -# testing_function('kg2e', distance_measure="kl_divergence") -# -# def test_SMEL_args(): -# """Function to test SME Algorithm with arguments.""" -# testing_function('sme', bilinear=False) -# -# def test_SMEB_args(): -# """Function to test SME Algorithm with arguments.""" -# testing_function('sme_bl', bilinear=True) -# -# def test_transE_display(): -# """Function to test transE display.""" -# testing_function('transe', display=True) \ No newline at end of file +def test_NTN(): + testing_function('ntn', ent_hidden_size=10, rel_hidden_size=10) # for avoiding OOM. + +def test_ConvE(): + testing_function('conve', channels=2) # for avoiding OOM. + +def test_ConvKB(): + testing_function('convkb', channels=2) # for avoiding OOM. + +def test_KG2E_EL_args(): + """Function to test KG2E Algorithm with arguments.""" + testing_function('kg2e_el', distance_measure="expected_likelihood") + +def test_KG2E_KL_args(): + """Function to test KG2E Algorithm with arguments.""" + testing_function('kg2e', distance_measure="kl_divergence") + +def test_SMEL_args(): + """Function to test SME Algorithm with arguments.""" + testing_function('sme', bilinear=False) + +def test_SMEB_args(): + """Function to test SME Algorithm with arguments.""" + testing_function('sme_bl', bilinear=True) + +def test_transE_display(): + """Function to test transE display.""" + testing_function('transe', display=True) \ No newline at end of file diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index fcd6561..9198ee0 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -31,23 +31,23 @@ def tunning_function(name): @pytest.mark.parametrize('model_name', [ - # 'transe', - # 'transh', - # 'transm', - # 'rescal', - # 'sme', - # 'sme_bl', - # 'transd', - # 'transr', - # 'ntn', - # 'slm', - # 'hole', - # 'rotate', - # 'kg2e', - # 'kg2e_el', - # 'complex', - # 'complexn3', - # 'distmult', + 'transe', + 'transh', + 'transm', + 'rescal', + 'sme', + 'sme_bl', + 'transd', + 'transr', + 'ntn', + 'slm', + 'hole', + 'rotate', + 'kg2e', + 'kg2e_el', + 'complex', + 'complexn3', + 'distmult', 'cp', ]) def test_tuning(model_name): From 667f5ba44e11e62f0f5f65e9aa353cabce668916 Mon Sep 17 00:00:00 2001 From: baxtree Date: Tue, 14 Apr 2020 09:33:53 +0100 Subject: [PATCH 08/19] make CP pointwise based and fix the regularizer method in DistMult --- pykg2vec/core/CP.py | 43 ++++++++++++++------------------ pykg2vec/core/DistMult.py | 2 +- pykg2vec/test/test_inference.py | 2 ++ pykg2vec/test/test_model.py | 4 +-- pykg2vec/test/test_tune_model.py | 26 +++++++++---------- pykg2vec/utils/trainer.py | 2 +- 6 files changed, 38 insertions(+), 41 deletions(-) diff --git a/pykg2vec/core/CP.py b/pykg2vec/core/CP.py index 5ca28af..385ff7e 100644 --- a/pykg2vec/core/CP.py +++ b/pykg2vec/core/CP.py @@ -14,7 +14,7 @@ def __init__(self, config): super(CP, self).__init__() self.config = config self.model_name = 'CP' - self.training_strategy = TrainingStrategy.PROJECTION_BASED + self.training_strategy = TrainingStrategy.POINTWISE_BASED def def_parameters(self): """Defines the model parameters. @@ -33,9 +33,11 @@ def def_parameters(self): k = self.config.hidden_size emb_initializer = tf.initializers.glorot_normal() - self.ent_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_embedding") + self.sub_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="sub_embedding") self.rel_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_embedding") - self.parameter_list = [self.ent_embeddings, self.rel_embeddings] + self.obj_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="obj_embedding") + self.parameter_list = [self.sub_embeddings, self.rel_embeddings, self.obj_embeddings] + def embed(self, h, r, t): """Function to get the embedding value. @@ -48,29 +50,22 @@ def embed(self, h, r, t): Returns: Tensors: Returns head, relation and tail embedding Tensors. """ - emb_h = tf.nn.embedding_lookup(self.ent_embeddings, h) + emb_h = tf.nn.embedding_lookup(self.sub_embeddings, h) emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) - emb_t = tf.nn.embedding_lookup(self.ent_embeddings, t) + emb_t = tf.nn.embedding_lookup(self.obj_embeddings, t) return emb_h, emb_r, emb_t - def forward(self, e, r, er_e2, direction=None): - emb_e = tf.nn.embedding_lookup(self.ent_embeddings, e) - emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) - - return -tf.reduce_mean(tf.keras.backend.binary_crossentropy(er_e2, (emb_e * emb_r) @ tf.transpose(self.ent_embeddings))) - - def get_reg(self): - return self.config.lmbda * (tf.reduce_sum(tf.reduce_sum(tf.abs(self.ent_embeddings) ** 3) + tf.reduce_sum( - tf.abs(self.rel_embeddings) ** 3))) - - def predict_tail_rank(self, h, r, topk=-1): - emb_h = tf.nn.embedding_lookup(self.ent_embeddings, h) - emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) - - candidates = -(emb_h * emb_r) @ tf.transpose(self.ent_embeddings) - _, rank = tf.nn.top_k(candidates, k=topk) + def forward(self, h, r, t): + h_e, r_e, t_e = self.embed(h, r, t) + return -tf.reduce_sum(h_e * r_e * t_e, -1) - return rank + def get_reg(self, h, r, t, type='N3'): + h_e, r_e, t_e = self.embed(h, r, t) + if type.lower() == 'f2': + regul_term = tf.reduce_mean(tf.reduce_sum(h_e**2, -1) + tf.reduce_sum(r_e**2, -1) + tf.reduce_sum(t_e**2,-1)) + elif type.lower() == 'n3': + regul_term = tf.reduce_mean(tf.reduce_sum(h_e**3, -1) + tf.reduce_sum(r_e**3, -1) + tf.reduce_sum(t_e**3,-1)) + else: + raise NotImplementedError('Unknown regularizer type: %s' % type) - def predict_head_rank(self, t, r, topk=-1): - return self.predict_tail_rank(t, r, topk) + return self.config.lmbda * regul_term diff --git a/pykg2vec/core/DistMult.py b/pykg2vec/core/DistMult.py index e994aa9..a4ce13a 100644 --- a/pykg2vec/core/DistMult.py +++ b/pykg2vec/core/DistMult.py @@ -86,7 +86,7 @@ def forward(self, h, r, t): h_e, r_e, t_e = self.embed(h, r, t) return -tf.reduce_sum(h_e*r_e*t_e, -1) - def get_regul(self, h, r, t): + def get_reg(self, h, r, t): h_e, r_e, t_e = self.embed(h, r, t) regul_term = tf.reduce_mean(tf.reduce_sum(h_e**2, -1) + tf.reduce_sum(r_e**2, -1) + tf.reduce_sum(t_e**2,-1)) return self.config.lmbda*regul_term \ No newline at end of file diff --git a/pykg2vec/test/test_inference.py b/pykg2vec/test/test_inference.py index 6dcad38..071fcc1 100644 --- a/pykg2vec/test/test_inference.py +++ b/pykg2vec/test/test_inference.py @@ -57,10 +57,12 @@ def testing_function_with_args(name, l1_flag, distance_measure=None, bilinear=No trainer.exit_interactive_mode() @pytest.mark.parametrize("model_name", [ + 'analogy' 'complex', 'complexn3', # 'conve', # 'convkb', + 'cp' 'distmult', 'hole', 'kg2e', diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index 5d116ef..215ee41 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -50,7 +50,9 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, @pytest.mark.parametrize("model_name", [ 'complex', 'complexn3', + 'cp', 'distmult', + 'hole', 'proje_pointwise', 'rescal', 'rotate', @@ -60,8 +62,6 @@ def testing_function(name, distance_measure=None, bilinear=None, display=False, 'transr', 'transd', 'transm', - 'hole', - 'cp', ]) def test_KGE_methods(model_name): """Function to test a set of KGE algorithsm.""" diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index 9198ee0..d6b6911 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -31,24 +31,24 @@ def tunning_function(name): @pytest.mark.parametrize('model_name', [ - 'transe', - 'transh', - 'transm', + 'complex', + 'complexn3', + 'cp', + 'distmult', + 'hole', + 'kg2e', + 'kg2e_el', + 'ntn', 'rescal', + 'rotate', + 'slm', 'sme', 'sme_bl', + 'transe', + 'transh', + 'transm', 'transd', 'transr', - 'ntn', - 'slm', - 'hole', - 'rotate', - 'kg2e', - 'kg2e_el', - 'complex', - 'complexn3', - 'distmult', - 'cp', ]) def test_tuning(model_name): """Function to test the tuning function.""" diff --git a/pykg2vec/utils/trainer.py b/pykg2vec/utils/trainer.py index b13696e..db0bc85 100644 --- a/pykg2vec/utils/trainer.py +++ b/pykg2vec/utils/trainer.py @@ -204,7 +204,7 @@ def train_step_pointwise(self, h, r, t, y): loss = tf.reduce_mean(tf.nn.softplus(y*preds)) - if hasattr(self.model, 'get_reg'): # for complex & complex-N3 & DistMult + if hasattr(self.model, 'get_reg'): # for complex & complex-N3 & DistMult & CP loss += self.model.get_reg(h, r, t) gradients = tape.gradient(loss, self.model.trainable_variables) From 21413aadc5b9a3b383dafd013e335415859356b3 Mon Sep 17 00:00:00 2001 From: baxtree Date: Tue, 14 Apr 2020 09:54:28 +0100 Subject: [PATCH 09/19] fix typos in inferece tests and tidy up --- pykg2vec/config/config.py | 16 +++---- pykg2vec/test/test_inference.py | 4 +- pykg2vec/utils/bayesian_optimizer.py | 72 ++++++++++++++-------------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/pykg2vec/config/config.py b/pykg2vec/config/config.py index 7c11ab8..5d5968c 100644 --- a/pykg2vec/config/config.py +++ b/pykg2vec/config/config.py @@ -40,10 +40,12 @@ def __init__(self): self.model_path = "pykg2vec.core" self.config_path = "pykg2vec.config.config" - self.modelMap = {"complex": "Complex.Complex", + self.modelMap = {"analogy": "ANALOGY.ANALOGY", + "complex": "Complex.Complex", "complexn3": "Complex.ComplexN3", "conve": "ConvE.ConvE", "convkb": "ConvKB.ConvKB", + "cp": "CP.CP", "hole": "HoLE.HoLE", "distmult": "DistMult.DistMult", "kg2e": "KG2E.KG2E", @@ -61,14 +63,14 @@ def __init__(self): "transg": "TransG.TransG", "transm": "TransM.TransM", "transr": "TransR.TransR", - "tucker": "TuckER.TuckER", - "cp": "CP.CP", - "analogy": "ANALOGY.ANALOGY"} + "tucker": "TuckER.TuckER"} - self.configMap = {"complex": "ComplexConfig", + self.configMap = {"analogy": "ANALOGYConfig", + "complex": "ComplexConfig", "complexn3": "ComplexConfig", "conve": "ConvEConfig", "convkb": "ConvKBConfig", + "cp": "CPConfig", "hole": "HoLEConfig", "distmult": "DistMultConfig", "kg2e": "KG2EConfig", @@ -86,9 +88,7 @@ def __init__(self): "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig", - "cp": "CPConfig", - "analogy": "ANALOGYConfig"} + "tucker": "TuckERConfig"} def import_model_config(self, name): """This function imports models and configuration. diff --git a/pykg2vec/test/test_inference.py b/pykg2vec/test/test_inference.py index 071fcc1..eb12d20 100644 --- a/pykg2vec/test/test_inference.py +++ b/pykg2vec/test/test_inference.py @@ -57,12 +57,12 @@ def testing_function_with_args(name, l1_flag, distance_measure=None, bilinear=No trainer.exit_interactive_mode() @pytest.mark.parametrize("model_name", [ - 'analogy' + 'analogy', 'complex', 'complexn3', # 'conve', # 'convkb', - 'cp' + 'cp', 'distmult', 'hole', 'kg2e', diff --git a/pykg2vec/utils/bayesian_optimizer.py b/pykg2vec/utils/bayesian_optimizer.py index 1ff4b4b..df195e0 100644 --- a/pykg2vec/utils/bayesian_optimizer.py +++ b/pykg2vec/utils/bayesian_optimizer.py @@ -19,33 +19,35 @@ config_path = "pykg2vec.config.config" hyper_param_path = "pykg2vec.config.hyperparams" -moduleMap = {"complex": "Complex", - "complexn3": "Complex", - "conve": "ConvE", - "hole": "HoLE", - "distmult": "DistMult", - "kg2e": "KG2E", - "kg2e_el": "KG2E", - "ntn": "NTN", - "proje_pointwise": "ProjE_pointwise", - "rescal": "Rescal", - "rotate": "RotatE", - "slm": "SLM", - "sme": "SME", - "sme_bl": "SME", - "transd": "TransD", - "transe": "TransE", - "transg": "TransG", - "transh": "TransH", - "transm": "TransM", - "transr": "TransR", - "tucker": "TuckER", - "cp": "CP", - "analogy": "ANALOGY"} - -modelMap = {"complex": "Complex", +moduleMap = {"analogy": "ANALOGY", + "complex": "Complex", + "complexn3": "Complex", + "conve": "ConvE", + "cp": "CP", + "hole": "HoLE", + "distmult": "DistMult", + "kg2e": "KG2E", + "kg2e_el": "KG2E", + "ntn": "NTN", + "proje_pointwise": "ProjE_pointwise", + "rescal": "Rescal", + "rotate": "RotatE", + "slm": "SLM", + "sme": "SME", + "sme_bl": "SME", + "transd": "TransD", + "transe": "TransE", + "transg": "TransG", + "transh": "TransH", + "transm": "TransM", + "transr": "TransR", + "tucker": "TuckER"} + +modelMap = {"analogy": "ANALOGY", + "complex": "Complex", "complexn3": "ComplexN3", "conve": "ConvE", + "cp": "CP", "hole": "HoLE", "distmult": "DistMult", "kg2e": "KG2E", @@ -63,13 +65,13 @@ "transh": "TransH", "transm": "TransM", "transr": "TransR", - "tucker": "TuckER", - "cp": "CP", - "analogy": "ANALOGY"} + "tucker": "TuckER"} -configMap = {"complex": "ComplexConfig", +configMap = {"analogy": "ANALOGYConfig", + "complex": "ComplexConfig", "complexn3": "ComplexConfig", "conve": "ConvEConfig", + "cp": "CPConfig", "hole": "HoLEConfig", "distmult": "DistMultConfig", "kg2e": "KG2EConfig", @@ -87,13 +89,13 @@ "transh": "TransHConfig", "transm": "TransMConfig", "transr": "TransRConfig", - "tucker": "TuckERConfig", - "cp": "CPConfig", - "analogy": "ANALOGYConfig"} + "tucker": "TuckERConfig"} -hypMap = {"complex": "ComplexParams", +hypMap = {"analogy": "ANALOGYParams", + "complex": "ComplexParams", "complexn3": "ComplexParams", "conve": "ConvEParams", + "cp": "CPParams", "hole": "HoLEParams", "distmult": "DistMultParams", "kg2e": "KG2EParams", @@ -111,9 +113,7 @@ "transh": "TransHParams", "transm": "TransMParams", "transr": "TransRParams", - "tucker": "TuckERParams", - "cp": "CPParams", - "analogy": "ANALOGYParams"} + "tucker": "TuckERParams"} class BaysOptimizer(object): From 3ed87e96af412c75cee2da578ec2ccf2eb01bee5 Mon Sep 17 00:00:00 2001 From: baxtree Date: Wed, 15 Apr 2020 09:37:26 +0100 Subject: [PATCH 10/19] fix the hidden size of the complex part in ANALOGY --- pykg2vec/core/ANALOGY.py | 56 ++++++++++++++++----------------- pykg2vec/test/test_model.py | 8 ++--- pykg2vec/utils/visualization.py | 6 ++-- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/pykg2vec/core/ANALOGY.py b/pykg2vec/core/ANALOGY.py index 30a6210..8b1e4e5 100644 --- a/pykg2vec/core/ANALOGY.py +++ b/pykg2vec/core/ANALOGY.py @@ -35,13 +35,13 @@ def def_parameters(self): emb_initializer = tf.initializers.glorot_normal() self.ent_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_embedding") self.rel_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_embedding") - self.ent_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="emb_e_real") - self.ent_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="emb_e_img") - self.rel_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="emb_rel_real") - self.rel_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="emb_rel_img") + self.ent_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_ent, k // 2)), name="emb_e_real") + self.ent_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_ent, k // 2)), name="emb_e_img") + self.rel_embeddings_real = tf.Variable(emb_initializer(shape=(num_total_rel, k // 2)), name="emb_rel_real") + self.rel_embeddings_img = tf.Variable(emb_initializer(shape=(num_total_rel, k // 2)), name="emb_rel_img") self.parameter_list = [self.ent_embeddings, self.rel_embeddings, self.ent_embeddings_real, self.ent_embeddings_img, self.rel_embeddings_real, self.rel_embeddings_img] - def embed1(self, h, r, t): + def embed(self, h, r, t): """Function to get the embedding value. Args: @@ -50,18 +50,13 @@ def embed1(self, h, r, t): t (Tensor): Tail entity ids of the triple. Returns: - Tensors: Returns real and imaginary values of head, relation and tail embedding. + Tensors: Returns head, relation and tail embedding Tensors. """ - h_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, h) - h_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, h) - - r_emb_real = tf.nn.embedding_lookup(self.rel_embeddings_real, r) - r_emb_img = tf.nn.embedding_lookup(self.rel_embeddings_img, r) - - t_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, t) - t_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, t) + h_emb = tf.nn.embedding_lookup(self.ent_embeddings, h) + r_emb = tf.nn.embedding_lookup(self.rel_embeddings, r) + t_emb = tf.nn.embedding_lookup(self.ent_embeddings, t) - return h_emb_real, h_emb_img, r_emb_real, r_emb_img, t_emb_real, t_emb_img + return h_emb, r_emb, t_emb def embed2(self, h, r, t): """Function to get the embedding value. @@ -72,25 +67,30 @@ def embed2(self, h, r, t): t (Tensor): Tail entity ids of the triple. Returns: - Tensors: Returns head, relation and tail embedding Tensors. + Tensors: Returns real and imaginary values of head, relation and tail embedding. """ - h_emb = tf.nn.embedding_lookup(self.ent_embeddings, h) - r_emb = tf.nn.embedding_lookup(self.rel_embeddings, r) - t_emb = tf.nn.embedding_lookup(self.ent_embeddings, t) + h_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, h) + h_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, h) - return h_emb, r_emb, t_emb + r_emb_real = tf.nn.embedding_lookup(self.rel_embeddings_real, r) + r_emb_img = tf.nn.embedding_lookup(self.rel_embeddings_img, r) + + t_emb_real = tf.nn.embedding_lookup(self.ent_embeddings_real, t) + t_emb_img = tf.nn.embedding_lookup(self.ent_embeddings_img, t) + + return h_emb_real, h_emb_img, r_emb_real, r_emb_img, t_emb_real, t_emb_img def forward(self, h, r, t): - h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed1(h, r, t) - h_e, r_e, t_e = self.embed2(h, r, t) - return -tf.reduce_sum( - h_e_real * t_e_real * r_e_real + h_e_img * t_e_img * r_e_real + h_e_real * t_e_img * r_e_img - h_e_img * t_e_real * r_e_img, -1) \ - + -tf.reduce_sum(h_e * r_e * t_e, -1) + h_e, r_e, t_e = self.embed(h, r, t) + h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed2(h, r, t) + + complex_loss = -tf.reduce_sum(h_e_real * t_e_real * r_e_real + h_e_img * t_e_img * r_e_real + h_e_real * t_e_img * r_e_img - h_e_img * t_e_real * r_e_img, -1) + distmult_loss = -tf.reduce_sum(h_e * r_e * t_e, -1) + return complex_loss + distmult_loss - # TODO: double check if we need the regularizer here def get_reg(self, h, r, t): - h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed1(h, r, t) - h_e, r_e, t_e = self.embed2(h, r, t) + h_e, r_e, t_e = self.embed(h, r, t) + h_e_real, h_e_img, r_e_real, r_e_img, t_e_real, t_e_img = self.embed2(h, r, t) regul_term = tf.reduce_mean(tf.reduce_sum(h_e_real**2, -1) + tf.reduce_sum(h_e_img**2, -1) + tf.reduce_sum(r_e_real**2,-1) + tf.reduce_sum(r_e_img**2, -1) + tf.reduce_sum(t_e_real**2, -1) + tf.reduce_sum(t_e_img**2, -1) diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index d8874a8..3f76f26 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -5,9 +5,11 @@ """ import pytest - -from pykg2vec.config.config import * +from pykg2vec.config.config import KGEArgParser, Importer from pykg2vec.utils.trainer import Trainer +from pykg2vec.utils.kgcontroller import KnowledgeGraph + + @pytest.mark.parametrize("model_name", [ 'analogy', 'complex', @@ -30,8 +32,6 @@ def test_KGE_methods(model_name): testing_function(model_name) -from pykg2vec.utils.kgcontroller import KnowledgeGraph - @pytest.mark.skip(reason="This is a functional method.") def testing_function(name, distance_measure=None, bilinear=None, display=False, ent_hidden_size=None, rel_hidden_size=None, channels=None): """Function to test the models with arguments.""" diff --git a/pykg2vec/utils/visualization.py b/pykg2vec/utils/visualization.py index 150b076..ad0b1ff 100644 --- a/pykg2vec/utils/visualization.py +++ b/pykg2vec/utils/visualization.py @@ -52,9 +52,9 @@ def __init__(self, model=None, vis_opts=None): self.model = model - self.algo_list = ['Complex', 'ComplexN3', 'ConvE', 'HoLE', 'DistMult', 'DistMult2', 'KG2E_EL', 'KG2E_KL', - 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', 'RotatE', 'SLM', 'SME_Bilinear', 'SME_Linear', - 'TransD', 'TransE', 'TransH', 'TransM', 'TransR', 'TuckER'] + self.algo_list = ['ANALOGY', 'Complex', 'ComplexN3', 'ConvE', 'CP', 'DistMult', 'DistMult2', 'HoLE', + 'KG2E_EL', 'KG2E_KL', 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', 'RotatE', 'SLM', + 'SME_Bilinear', 'SME_Linear', 'TransD', 'TransE', 'TransH', 'TransM', 'TransR', 'TuckER'] self.h_name = [] self.r_name = [] From 0d36edf1e94cc55a74937479d916446a2aa2c9c0 Mon Sep 17 00:00:00 2001 From: baxtree Date: Fri, 17 Apr 2020 09:20:24 +0100 Subject: [PATCH 11/19] add NELL_955 datat set and update README --- README.md | 8 ++-- pykg2vec/utils/kgcontroller.py | 73 ++++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 4360896..1a760e3 100644 --- a/README.md +++ b/README.md @@ -84,22 +84,22 @@ $ python train.py -h $ python train.py -mn TransE # Train using different KGE methods. -$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE| - distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise] +$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy| + DistMult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise] # For KGE using projection-based loss function, use more processes for batch generation. $ python train.py -mn [ConvE|ConvKB|Proje_pointwise] -npg [the number of processes, 4 or 6] # Train TransE model using different benchmark datasets. $ python train.py -mn TransE -ds [fb15k|wn18|wn18_rr|yago3_10|fb15k_237| - ks|nations|umls|dl50a] + ks|nations|umls|dl50a|nell_955] ``` Pykg2vec aims to include most of the state-of-the-art KGE methods. You can check [Implemented Algorithms](https://pykg2vec.readthedocs.io/en/latest/algos.html) for more details. Some models are still under development [Conv2D|TuckER]. To ensure the correctness of included KGE methods we also use the hyperparameter settings from original papers to see if the result is consistent. ```bash # train KGE method with the hyperparameters used in original papers. (FB15k supported only) -$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE| +$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy| distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise] -exp true -ds fb15k ``` diff --git a/pykg2vec/utils/kgcontroller.py b/pykg2vec/utils/kgcontroller.py index ef5ecad..9831aa3 100644 --- a/pykg2vec/utils/kgcontroller.py +++ b/pykg2vec/utils/kgcontroller.py @@ -3,9 +3,7 @@ """ This module is for controlling knowledge graph """ - - -import shutil, tarfile, pickle, time +import shutil, tarfile, pickle, time, os, zipfile import urllib.request from pathlib import Path from collections import defaultdict @@ -91,7 +89,7 @@ def __init__(self, tot_entity=None, self.tot_entity = tot_entity -def extract(tar_path, extract_path='.'): +def extract_tar(tar_path, extract_path='.'): """This function extracts the tar file. Most of the knowledge graph dataset are donwloaded in a compressed @@ -108,7 +106,23 @@ def extract(tar_path, extract_path='.'): for item in tar: tar.extract(item, extract_path) if item.name.find(".tgz") != -1 or item.name.find(".tar") != -1: - extract(item.name, "./" + item.name[:item.name.rfind('/')]) + extract_tar(item.name, "./" + item.name[:item.name.rfind('/')]) + +def extract_zip(zip_path, extract_path='.'): + """This function extracts the zip file. + + Most of the knowledge graph dataset are donwloaded in a compressed + zip format. This function is used to extract them + + Args: + zip_path (str): Location of the zip folder. + extract_path (str): Path where the files will be decompressed. + + Todo: + * Move this module to utils! + """ + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(extract_path) class KnownDataset: @@ -152,13 +166,14 @@ def __init__(self, name, url, prefix): self.dataset_home_path = self.dataset_home_path.resolve() self.root_path = self.dataset_home_path / self.name self.tar = self.root_path / ('%s.tgz' % self.name) + self.zip = self.root_path / ('%s.zip' % self.name) if not self.root_path.exists(): self.download() self.extract() path_eq_root = ['YAGO3_10', 'WN18RR', 'FB15K_237', 'Kinship', - 'Nations', 'UMLS'] + 'Nations', 'UMLS', 'NELL_995'] if self.name == 'WN18': self.dataset_path = self.root_path / 'wordnet-mlj12' elif self.name in path_eq_root: @@ -194,17 +209,29 @@ def download(self): self._logger.info("Downloading the dataset %s" % self.name) self.root_path.mkdir() - with urllib.request.urlopen(self.url) as response, open(str(self.tar), 'wb') as out_file: - shutil.copyfileobj(response, out_file) + if self.url.endswith('.tar.gz') or self.url.endswith('.tgz'): + with urllib.request.urlopen(self.url) as response, open(str(self.tar), 'wb') as out_file: + shutil.copyfileobj(response, out_file) + elif self.url.endswith('.zip'): + with urllib.request.urlopen(self.url) as response, open(str(self.zip), 'wb') as out_file: + shutil.copyfileobj(response, out_file) + else: + raise NotImplementedError("Unknown compression format") def extract(self): - ''' Extract the downloaded tar under the folder with the given dataset name''' - self._logger.info("Extracting the downloaded dataset from %s to %s" % (self.tar, self.root_path)) + ''' Extract the downloaded file under the folder with the given dataset name''' try: - extract(str(self.tar), str(self.root_path)) + if (os.path.exists(self.tar)): + self._logger.info("Extracting the downloaded dataset from %s to %s" % (self.tar, self.root_path)) + extract_tar(str(self.tar), str(self.root_path)) + return + if (os.path.exists(self.zip)): + self._logger.info("Extracting the downloaded dataset from %s to %s" % (self.zip, self.root_path)) + extract_zip(str(self.zip), str(self.root_path)) + return except Exception as e: - self._logger.info("Could not extract the tgz file!") + self._logger.info("Could not extract the target file!") self._logger.info("%s %s" % (type(e), e.args)) def read_metadata(self): @@ -403,6 +430,26 @@ def __init__(self): KnownDataset.__init__(self, name, url, prefix) +class NELL_995(KnownDataset): + """This data structure defines the necessary information for downloading NELL-995 dataset. + + NELL-995 module inherits the KnownDataset class for processing + the knowledge graph dataset. + + Attributes: + name (str): Name of the datasets + url (str): The full url where the dataset resides. + prefix (str): The prefix of the dataset given the website. + + """ + def __init__(self): + name = "NELL_995" + url = "https://github.com/louisccc/KGppler/raw/master/datasets/NELL_995.zip" + prefix = '' + + KnownDataset.__init__(self, name, url, prefix) + + class UserDefinedDataset(object): """The class consists of modules to handle the user defined datasets. @@ -533,6 +580,8 @@ def __init__(self, dataset='Freebase15k', custom_dataset_path=None): self.dataset = Nations() elif dataset.lower() == 'umls': self.dataset = UMLS() + elif dataset.lower() == 'nell_995': + self.dataset = NELL_995() else: # if the dataset does not match with existing one, check if it exists in user's local space. # if it still can't find corresponding folder, raise exception in UserDefinedDataset.__init__() From 4c59c5be1a6278a7c7249b79ee936c3f8c5c89f5 Mon Sep 17 00:00:00 2001 From: baxtree Date: Sat, 18 Apr 2020 12:54:18 +0100 Subject: [PATCH 12/19] improve the test on compressed dataset extraction --- pykg2vec/test/test_kg.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pykg2vec/test/test_kg.py b/pykg2vec/test/test_kg.py index 546b475..5db09de 100644 --- a/pykg2vec/test/test_kg.py +++ b/pykg2vec/test/test_kg.py @@ -1,5 +1,6 @@ import os, pytest -from pykg2vec.utils.kgcontroller import KnowledgeGraph +from pathlib import Path +from pykg2vec.utils.kgcontroller import KnowledgeGraph, KnownDataset @pytest.mark.parametrize("dataset_name", ["freebase15k", "wordnet18", "wordnet18_rr", "yago3_10"]) def test_benchmarks(dataset_name): @@ -62,3 +63,21 @@ def test_userdefined_dataset(): assert knowledge_graph.kg_meta.tot_valid_triples == 1 assert knowledge_graph.kg_meta.tot_entity == 6 assert knowledge_graph.kg_meta.tot_relation == 3 + +@pytest.mark.parametrize('file_name, new_ext', [ + ('dataset.tar.gz', 'tgz'), + ('dataset.tgz', 'tgz'), + ('dataset.zip', 'zip'), +]) +def test_extract_compressed_dataset(file_name, new_ext): + url = Path('resource/%s' % file_name).absolute().as_uri() + dataset_name = 'test_dataset_%s' % file_name.replace('.', '_') + dataset = KnownDataset(dataset_name, url, 'userdefineddataset-') + dataset_dir = os.path.join(dataset.dataset_home_path, dataset_name) + dataset_files = os.listdir(dataset_dir) + + assert len(dataset_files) == 4 + assert dataset_name + '.' + new_ext in dataset_files + assert 'userdefineddataset-train.txt' in dataset_files + assert 'userdefineddataset-test.txt' in dataset_files + assert 'userdefineddataset-valid.txt' in dataset_files From 2721da3eb89e3d13f26a5bedb4c23cfcc0fea021 Mon Sep 17 00:00:00 2001 From: baxtree Date: Sat, 18 Apr 2020 12:57:36 +0100 Subject: [PATCH 13/19] add sample files --- pykg2vec/test/resource/dataset.tar.gz | Bin 0 -> 500 bytes pykg2vec/test/resource/dataset.tgz | Bin 0 -> 500 bytes pykg2vec/test/resource/dataset.zip | Bin 0 -> 788 bytes 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pykg2vec/test/resource/dataset.tar.gz create mode 100644 pykg2vec/test/resource/dataset.tgz create mode 100644 pykg2vec/test/resource/dataset.zip diff --git a/pykg2vec/test/resource/dataset.tar.gz b/pykg2vec/test/resource/dataset.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..15a255f9a3dd8b6aa454ef9553ff6fcbaf66ecd7 GIT binary patch literal 500 zcmVgiwFR-{hD3?1MQc)Zrd;vhP_r>-hflLs^=Wu2TfU0phJL8odPyuTcM5u z$aLVbFV?kB(7sP;yROpKbsAq}Bst$gfd`-{iN8MbJj~8wsAKnXI*DB!-#2Zn>yW}E zrWYwrQkiCHv>O0mloBJmpl{(PWR*Z^l+jjONdnMXsYYV9<9+{&=_G|NPvu=`W`Ns| zvwRfst9>A#6(5?qB4@3ZUO|yNrn$pOrMy;37ZSu(YOZ31+F3#^iAqI;1{GpKPF&uu zDlbdtoY%${ zx2m+`ru|f5aqM^Z7&u(&dsg}XoUifA<@b4e^8D+!RsJC_u|M*kzxYVN-e$0YC!YV0 z`R9Lu&$>9yQN3>SWeUysVuu&OJS5IH`o9ZJyZf);ZvWfg^G^U)v;T)+TmNT=8|}=z zy2k(BC@@aWtLt_E7Dxl$cMITn3$T9giwFR-{hD3?1MQc)Zrd;vhP_r>-hflLs^=Wu2TfU0phJL8odPyuTcM5u z$aLVbFV?kB(7sP;yROpKbsAq}Bst$gfd`-{iN8MbJj~8wsAKnXI*DB!-#2Zn>yW}E zrWYwrQkiCHv>O0mloBJmpl{(PWR*Z^l+jjONdnMXsYYV9<9+{&=_G|NPvu=`W`Ns| zvwRfst9>A#6(5?qB4@3ZUO|yNrn$pOrMy;37ZSu(YOZ31+F3#^iAqI;1{GpKPF&uu zDlbdtoY%${ zx2m+`ru|f5aqM^Z7&u(&dsg}XoUifA<@b4e^8D+!RsJC_u|M*kzxYVN-e$0YC!YV0 z`R9Lu&$>9yQN3>SWeUysVuu&OJS5IH`o9ZJyZf);ZvWfg^G^U)v;T)+TmNT=8|}=z zy2k(BC@@aWtLt_E7Dxl$cMITn3$T9 Date: Sat, 18 Apr 2020 13:11:41 +0100 Subject: [PATCH 14/19] fix the file path for the ci build --- pykg2vec/test/test_kg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pykg2vec/test/test_kg.py b/pykg2vec/test/test_kg.py index 5db09de..1737406 100644 --- a/pykg2vec/test/test_kg.py +++ b/pykg2vec/test/test_kg.py @@ -40,7 +40,7 @@ def test_fb15k_meta(): def test_userdefined_dataset(): - custom_dataset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resource/custom_dataset") + custom_dataset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resource', 'custom_dataset') knowledge_graph = KnowledgeGraph(dataset="userdefineddataset", custom_dataset_path=custom_dataset_path) knowledge_graph.prepare_data() knowledge_graph.dump() @@ -70,7 +70,7 @@ def test_userdefined_dataset(): ('dataset.zip', 'zip'), ]) def test_extract_compressed_dataset(file_name, new_ext): - url = Path('resource/%s' % file_name).absolute().as_uri() + url = Path(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resource', file_name)).absolute().as_uri() dataset_name = 'test_dataset_%s' % file_name.replace('.', '_') dataset = KnownDataset(dataset_name, url, 'userdefineddataset-') dataset_dir = os.path.join(dataset.dataset_home_path, dataset_name) From 53f6fe4a59d23a13bc4c8cfdcc3bae6345ad7eb1 Mon Sep 17 00:00:00 2001 From: baxtree Date: Sat, 18 Apr 2020 13:15:56 +0100 Subject: [PATCH 15/19] fix the zip archive --- pykg2vec/test/resource/dataset.zip | Bin 788 -> 644 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pykg2vec/test/resource/dataset.zip b/pykg2vec/test/resource/dataset.zip index 59a882d93a317482a35b85b7ad8ad87995fa0f28..f078da3484611b74dc8e5eda3c9a89506c88b067 100644 GIT binary patch literal 644 zcmWIWW@h1H00Gg$Ndex;`&UT-*&r;>MiASKP;Gn7tWZS2wz9;W%oL(*Ge+2E46)5P zz?+dtju}@_f^24BUr W0OGQNl?~*5W*}4rhK467L>K^)eYBkb literal 788 zcmWIWW@h1H0D;27NdaI6l;B_xU`R Date: Mon, 20 Apr 2020 09:57:23 +0100 Subject: [PATCH 16/19] add the SimplE model --- pykg2vec/config/config.py | 58 +++++++++++ pykg2vec/config/hyperparams.py | 39 +++++++- pykg2vec/core/ANALOGY.py | 5 +- pykg2vec/core/CP.py | 4 +- pykg2vec/core/SimplE.py | 139 +++++++++++++++++++++++++++ pykg2vec/core/TransR.py | 2 +- pykg2vec/test/test_inference.py | 2 + pykg2vec/test/test_model.py | 2 + pykg2vec/test/test_tune_model.py | 2 + pykg2vec/utils/bayesian_optimizer.py | 8 ++ pykg2vec/utils/generator.py | 6 +- pykg2vec/utils/visualization.py | 5 +- 12 files changed, 262 insertions(+), 10 deletions(-) create mode 100644 pykg2vec/core/SimplE.py diff --git a/pykg2vec/config/config.py b/pykg2vec/config/config.py index 5d5968c..0761d86 100644 --- a/pykg2vec/config/config.py +++ b/pykg2vec/config/config.py @@ -54,6 +54,8 @@ def __init__(self): "proje_pointwise": "ProjE_pointwise.ProjE_pointwise", "rescal": "Rescal.Rescal", "rotate": "RotatE.RotatE", + "simple": "SimplE.SimplE", + "simple_ignr": "SimplE.SimplE_ignr", "slm": "SLM.SLM", "sme": "SME.SME", "sme_bl": "SME.SME_BL", @@ -79,6 +81,8 @@ def __init__(self): "proje_pointwise": "ProjE_pointwiseConfig", "rescal": "RescalConfig", "rotate": "RotatEConfig", + "simple": "SimplEConfig", + "simple_ignr": "SimplEConfig", "slm": "SLMConfig", "sme": "SMEConfig", "sme_bl": "SMEConfig", @@ -1592,3 +1596,57 @@ def __init__(self, args=None): } BasicConfig.__init__(self, args) + + +class SimplEConfig(BasicConfig): + """This class defines the configuration for the SimplE Algorithm. + + SimplEConfig inherits the BasicConfig and defines the local arguements used in the + algorithm. + + Attributes: + hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning. + + Args: + lambda (float) : Weigth applied to the regularization in the loss function. + learning_rate (float): Defines the learning rate for the optimization. + L1_flag (bool): If True, perform L1 regularization on the model parameters. + hidden_size (int): Defines the size of the latent dimension for entities and relations. + batch_size (int): Defines the batch size for training the algorithm. + epochs (int): Defines the total number of epochs for training the algorithm. + margin (float): Defines the margin used between the positive and negative triple loss. + data (str): Defines the knowledge base dataset to be used for training the algorithm. + optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc. + sampling (str): Defines the sampling (bern or uniform) for corrupting the triples. + + """ + + def __init__(self, args=None): + self.lmbda = args.lmbda + self.learning_rate = args.learning_rate + self.hidden_size = args.hidden_size + self.batch_size = args.batch_training + self.epochs = args.epochs + self.data = args.dataset_name + self.optimizer = args.optimizer + self.sampling = args.sampling + self.neg_rate = args.negrate + + if args.exp is True: + paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'simple') + for key, value in paper_params.items(): + self.__dict__[key] = value # copy all the setting from the paper. + + self.hyperparameters = { + 'lmbda': self.lmbda, + 'learning_rate': self.learning_rate, + 'hidden_size': self.hidden_size, + 'batch_size': self.batch_size, + 'epochs': self.epochs, + 'data': self.data, + 'optimizer': self.optimizer, + 'sampling': self.sampling, + 'neg_rate': self.neg_rate, + } + + BasicConfig.__init__(self, args) diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py index 60b7b40..eda7ed6 100644 --- a/pykg2vec/config/hyperparams.py +++ b/pykg2vec/config/hyperparams.py @@ -34,7 +34,8 @@ def __init__(self): 'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0}, 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}, 'cp': {'learning_rate': 0.01, 'hidden_size': 50, 'batch_size': 128, 'epochs': 50, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001}, - 'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001} + 'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001}, + 'simple': {'learning_rate': 0.01, 'hidden_size': 100, 'batch_size': 128, 'epochs': 1000, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001} } } @@ -770,3 +771,39 @@ def __init__(self): 'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']), 'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs. } + +class SimplEParams: + """This class defines the hyperameters and its ranges for tuning SimplE algorithm. + + SimplEParams defines all the possibel values to be tuned for the algorithm. User may + + change these values directly for performing the bayesian optimization of the hyper-parameters + + Args: + lambda (list) : List of floating point values. + feature_map_dropout (list) :List of floating point values. + input_dropout (list) : List of floating point values. + hidden_dropout (list) : List of floating point values. + use_bias (list) :List of boolean values. + label_smoothing (list) : List of floating point values. + lr_decay (float) : List of floating point values. + learning_rate (list): List of floating point values. + L1_flag (list): List of boolean values. + hidden_size (list): List of integer values. + batch_size (list): List of integer values. + epochs (list): List of integer values. + margin (list): List of floating point values. + optimizer (list): List of strings defining the optimization algorithm to be used. + sampling (list): List of string defining the sampling to be used for generating negative examples. + + """ + + def __init__(self): + self.search_space = { + 'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)), + 'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)), + 'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)), + 'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)), + 'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']), + 'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs. + } diff --git a/pykg2vec/core/ANALOGY.py b/pykg2vec/core/ANALOGY.py index 8b1e4e5..c0c2b92 100644 --- a/pykg2vec/core/ANALOGY.py +++ b/pykg2vec/core/ANALOGY.py @@ -25,7 +25,10 @@ def def_parameters(self): k (Tensor): Size of the latent dimesnion for entities and relations. ent_embeddings (Tensor Variable): Lookup variable containing embedding of the entities. rel_embeddings (Tensor Variable): Lookup variable containing embedding of the relations. - b (Tensor Variable): Variable storing the bias values. + ent_embeddings_real (Tensor Variable): Lookup variable containing real values of the entities. + ent_embeddings_img (Tensor Variable): Lookup variable containing imaginary values of the entities. + rel_embeddings_real (Tensor Variable): Lookup variable containing real values of the relations. + rel_embeddings_img (Tensor Variable): Lookup variable containing imaginary values of the relations. parameter_list (list): List of Tensor parameters. """ num_total_ent = self.config.kg_meta.tot_entity diff --git a/pykg2vec/core/CP.py b/pykg2vec/core/CP.py index 385ff7e..da0b647 100644 --- a/pykg2vec/core/CP.py +++ b/pykg2vec/core/CP.py @@ -23,9 +23,9 @@ def def_parameters(self): num_total_ent (int): Total number of entities. num_total_rel (int): Total number of relations. k (Tensor): Size of the latent dimesnion for entities and relations. - ent_embeddings (Tensor Variable): Lookup variable containing embedding of the entities. + sub_embeddings (Tensor Variable): Lookup variable containing embedding of the head entities. rel_embeddings (Tensor Variable): Lookup variable containing embedding of the relations. - b (Tensor Variable): Variable storing the bias values. + obj_embeddings (Tensor Variable): Lookup variable containing embedding of the tail entities. parameter_list (list): List of Tensor parameters. """ num_total_ent = self.config.kg_meta.tot_entity diff --git a/pykg2vec/core/SimplE.py b/pykg2vec/core/SimplE.py new file mode 100644 index 0000000..d07d1ba --- /dev/null +++ b/pykg2vec/core/SimplE.py @@ -0,0 +1,139 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from pykg2vec.core.KGMeta import ModelMeta +from pykg2vec.utils.generator import TrainingStrategy + + +class SimplE(ModelMeta): + + def __init__(self, config): + super(SimplE, self).__init__() + self.config = config + self.model_name = 'SimplE_avg' + self.training_strategy = TrainingStrategy.POINTWISE_BASED + + def def_parameters(self): + """Defines the model parameters. + + Attributes: + num_total_ent (int): Total number of entities. + num_total_rel (int): Total number of relations. + k (Tensor): Size of the latent dimesnion for entities and relations. + ent_head_embeddings (Tensor Variable): Lookup variable containing embedding of the head entities. + ent_tail_embeddings (Tensor Variable): Lookup variable containing embedding of the tail relations. + rel_embeddings (Tensor Variable): Lookup variable containing embedding of the entities. + rel_inv_embeddings (Tensor Variable): Lookup variable containing embedding of the inverse relations. + parameter_list (list): List of Tensor parameters. + """ + num_total_ent = self.config.kg_meta.tot_entity + num_total_rel = self.config.kg_meta.tot_relation + k = self.config.hidden_size + + emb_initializer = tf.initializers.glorot_normal() + self.ent_head_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_head_embedding") + self.ent_tail_embeddings = tf.Variable(emb_initializer(shape=(num_total_ent, k)), name="ent_tail_embedding") + self.rel_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_embedding") + self.rel_inv_embeddings = tf.Variable(emb_initializer(shape=(num_total_rel, k)), name="rel_inv_embedding") + self.parameter_list = [self.ent_head_embeddings, self.ent_tail_embeddings, self.rel_embeddings, self.rel_inv_embeddings] + + + def embed(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns head, relation and tail embedding Tensors. + """ + emb_h1 = tf.nn.embedding_lookup(self.ent_head_embeddings, h) + emb_h2 = tf.nn.embedding_lookup(self.ent_head_embeddings, t) + emb_r1 = tf.nn.embedding_lookup(self.rel_embeddings, r) + emb_r2 = tf.nn.embedding_lookup(self.rel_inv_embeddings, r) + emb_t1 = tf.nn.embedding_lookup(self.ent_tail_embeddings, t) + emb_t2 = tf.nn.embedding_lookup(self.ent_tail_embeddings, h) + return emb_h1, emb_h2, emb_r1, emb_r2, emb_t1, emb_t2 + + def embed2(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns head, relation and tail embedding Tensors. + """ + + emb_h = tf.nn.embedding_lookup(self.ent_head_embeddings, h) + emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) + emb_r_rev = tf.nn.embedding_lookup(self.rel_inv_embeddings, r) + emb_t = tf.nn.embedding_lookup(self.ent_tail_embeddings, t) + + return emb_h, emb_r, emb_r_rev, emb_t + + def forward(self, h, r, t): + h1_e, h2_e, r1_e, r2_e, t1_e, t2_e = self.embed(h, r, t) + + norm_h1_e = tf.nn.l2_normalize(h1_e, -1) + norm_h2_e = tf.nn.l2_normalize(h2_e, -1) + norm_r1_e = tf.nn.l2_normalize(r1_e, -1) + norm_r2_e = tf.nn.l2_normalize(r2_e, -1) + norm_t1_e = tf.nn.l2_normalize(t1_e, -1) + norm_t2_e = tf.nn.l2_normalize(t2_e, -1) + + init = (tf.reduce_sum(tf.multiply(tf.multiply(norm_h1_e, norm_r1_e), norm_t1_e), 1) + + tf.reduce_sum(tf.multiply(tf.multiply(norm_h2_e, norm_r2_e), norm_t2_e), 1)) / 2.0 + return tf.clip_by_value(init, -20, 20) + + def get_reg(self, h, r, t): + h_e, r_e, r_rev_e, t_e = self.embed2(h, r, t) + regul_term = tf.nn.l2_loss(h_e) + tf.nn.l2_loss(t_e) + tf.nn.l2_loss(r_e) + tf.nn.l2_loss(r_rev_e) + return self.config.lmbda * regul_term + + +class SimplE_ignr(SimplE): + + def __init__(self, config): + super(SimplE, self).__init__() + self.config = config + self.model_name = 'SimplE_ignr' + self.training_strategy = TrainingStrategy.POINTWISE_BASED + + def embed(self, h, r, t): + """Function to get the embedding value. + + Args: + h (Tensor): Head entities ids. + r (Tensor): Relation ids of the triple. + t (Tensor): Tail entity ids of the triple. + + Returns: + Tensors: Returns head, relation and tail embedding Tensors. + """ + + emb_h = tf.concat([tf.gather(self.ent_head_embeddings, h), tf.gather(self.ent_head_embeddings, t)], 1) + emb_r = tf.concat([tf.gather(self.rel_embeddings, r), tf.gather(self.rel_inv_embeddings, r)], 1) + emb_t = tf.concat([tf.gather(self.ent_tail_embeddings, t), tf.gather(self.ent_tail_embeddings, h)], 1) + + return emb_h, emb_r, emb_t + + def forward(self, h, r, t): + h_e, r_e, t_e = self.embed(h, r, t) + + init = tf.reduce_sum(tf.multiply(tf.multiply(h_e, r_e), t_e), 1) + return tf.clip_by_value(init, -20, 20) + + def get_reg(self, h, r, t): + h_e, r_e, r_rev_e, t_e = self.embed2(h, r, t) + regul_term = 2.0 * (tf.nn.l2_loss(h_e) + tf.nn.l2_loss(t_e) + tf.nn.l2_loss(r_e) + tf.nn.l2_loss(r_rev_e)) + return self.config.lmbda * regul_term + + diff --git a/pykg2vec/core/TransR.py b/pykg2vec/core/TransR.py index 492b062..d0031a2 100644 --- a/pykg2vec/core/TransR.py +++ b/pykg2vec/core/TransR.py @@ -101,7 +101,7 @@ def embed(self, h, r, t): transform_h_e = tf.matmul(h_e, matrix) transform_t_e = tf.matmul(t_e, matrix) - # [b, d, 1] = [b, 1, k] * [b, k, d] + # [b, 1, d] = [b, 1, k] * [b, k, d] h_e = tf.squeeze(transform_h_e, axis=1) t_e = tf.squeeze(transform_t_e, axis=1) diff --git a/pykg2vec/test/test_inference.py b/pykg2vec/test/test_inference.py index eb12d20..bda9a54 100644 --- a/pykg2vec/test/test_inference.py +++ b/pykg2vec/test/test_inference.py @@ -71,6 +71,8 @@ def testing_function_with_args(name, l1_flag, distance_measure=None, bilinear=No # 'proje_pointwise', 'rotate', 'rescal', + 'simple', + 'simple_ignr', 'slm', 'sme', 'transd', diff --git a/pykg2vec/test/test_model.py b/pykg2vec/test/test_model.py index 3f76f26..7eb30ae 100644 --- a/pykg2vec/test/test_model.py +++ b/pykg2vec/test/test_model.py @@ -20,6 +20,8 @@ 'proje_pointwise', 'rescal', 'rotate', + 'simple', + 'simple_ignr', 'slm', 'transe', 'transh', diff --git a/pykg2vec/test/test_tune_model.py b/pykg2vec/test/test_tune_model.py index 1acffdf..f7a5b73 100644 --- a/pykg2vec/test/test_tune_model.py +++ b/pykg2vec/test/test_tune_model.py @@ -42,6 +42,8 @@ def tunning_function(name): 'ntn', 'rescal', 'rotate', + 'simple', + 'simple_ignr', 'slm', 'sme', 'sme_bl', diff --git a/pykg2vec/utils/bayesian_optimizer.py b/pykg2vec/utils/bayesian_optimizer.py index df195e0..30ae717 100644 --- a/pykg2vec/utils/bayesian_optimizer.py +++ b/pykg2vec/utils/bayesian_optimizer.py @@ -32,6 +32,8 @@ "proje_pointwise": "ProjE_pointwise", "rescal": "Rescal", "rotate": "RotatE", + "simple": "SimplE", + "simple_ignr": "SimplE", "slm": "SLM", "sme": "SME", "sme_bl": "SME", @@ -56,6 +58,8 @@ "proje_pointwise": "ProjE_pointwise", "rescal": "Rescal", "rotate": "RotatE", + "simple": "SimplE", + "simple_ignr": "SimplE_ignr", "slm": "SLM", "sme": "SME", "sme_bl": "SME_BL", @@ -80,6 +84,8 @@ "proje_pointwise": "ProjE_pointwiseConfig", "rescal": "RescalConfig", "rotate": "RotatEConfig", + "simple": "SimplEConfig", + "simple_ignr": "SimplEConfig", "slm": "SLMConfig", "sme": "SMEConfig", "sme_bl": "SMEConfig", @@ -104,6 +110,8 @@ "proje_pointwise": "ProjE_pointwiseParams", "rescal": "RescalParams", "rotate": "RotatEParams", + "simple": "SimplEParams", + "simple_ignr": "SimplEParams", "slm": "SLMParams", "sme": "SMEParams", "sme_bl": "SMEParams", diff --git a/pykg2vec/utils/generator.py b/pykg2vec/utils/generator.py index 9df5b3f..d09db62 100644 --- a/pykg2vec/utils/generator.py +++ b/pykg2vec/utils/generator.py @@ -325,6 +325,6 @@ def start_one_epoch(self, num_batch): class TrainingStrategy(Enum): - PROJECTION_BASED = "projection_based" - PAIRWISE_BASED = "pairwise_based" - POINTWISE_BASED = "pointwise_based" + PROJECTION_BASED = "projection_based" # matching models with neural network + PAIRWISE_BASED = "pairwise_based" # translational distance models + POINTWISE_BASED = "pointwise_based" # semantic matching models diff --git a/pykg2vec/utils/visualization.py b/pykg2vec/utils/visualization.py index ad0b1ff..d51c2ee 100644 --- a/pykg2vec/utils/visualization.py +++ b/pykg2vec/utils/visualization.py @@ -53,8 +53,9 @@ def __init__(self, model=None, vis_opts=None): self.model = model self.algo_list = ['ANALOGY', 'Complex', 'ComplexN3', 'ConvE', 'CP', 'DistMult', 'DistMult2', 'HoLE', - 'KG2E_EL', 'KG2E_KL', 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', 'RotatE', 'SLM', - 'SME_Bilinear', 'SME_Linear', 'TransD', 'TransE', 'TransH', 'TransM', 'TransR', 'TuckER'] + 'KG2E_EL', 'KG2E_KL', 'KGMeta', 'NTN', 'ProjE_pointwise', 'Rescal', 'RotatE', 'SimplE_avg', + 'SimplE_ignr', 'SLM', 'SME_Bilinear', 'SME_Linear', 'TransD', 'TransE', 'TransH', 'TransM', + 'TransR', 'TuckER'] self.h_name = [] self.r_name = [] From 18af48aa8ac86b51baaaea338ff5087356b712a3 Mon Sep 17 00:00:00 2001 From: baxtree Date: Mon, 20 Apr 2020 21:18:26 +0100 Subject: [PATCH 17/19] fix the SimplE model --- pykg2vec/core/SimplE.py | 45 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/pykg2vec/core/SimplE.py b/pykg2vec/core/SimplE.py index d07d1ba..f6456bf 100644 --- a/pykg2vec/core/SimplE.py +++ b/pykg2vec/core/SimplE.py @@ -2,6 +2,7 @@ from __future__ import division from __future__ import print_function +import math import tensorflow as tf from pykg2vec.core.KGMeta import ModelMeta @@ -60,42 +61,16 @@ def embed(self, h, r, t): emb_t2 = tf.nn.embedding_lookup(self.ent_tail_embeddings, h) return emb_h1, emb_h2, emb_r1, emb_r2, emb_t1, emb_t2 - def embed2(self, h, r, t): - """Function to get the embedding value. - - Args: - h (Tensor): Head entities ids. - r (Tensor): Relation ids of the triple. - t (Tensor): Tail entity ids of the triple. - - Returns: - Tensors: Returns head, relation and tail embedding Tensors. - """ - - emb_h = tf.nn.embedding_lookup(self.ent_head_embeddings, h) - emb_r = tf.nn.embedding_lookup(self.rel_embeddings, r) - emb_r_rev = tf.nn.embedding_lookup(self.rel_inv_embeddings, r) - emb_t = tf.nn.embedding_lookup(self.ent_tail_embeddings, t) - - return emb_h, emb_r, emb_r_rev, emb_t - def forward(self, h, r, t): h1_e, h2_e, r1_e, r2_e, t1_e, t2_e = self.embed(h, r, t) - norm_h1_e = tf.nn.l2_normalize(h1_e, -1) - norm_h2_e = tf.nn.l2_normalize(h2_e, -1) - norm_r1_e = tf.nn.l2_normalize(r1_e, -1) - norm_r2_e = tf.nn.l2_normalize(r2_e, -1) - norm_t1_e = tf.nn.l2_normalize(t1_e, -1) - norm_t2_e = tf.nn.l2_normalize(t2_e, -1) - - init = (tf.reduce_sum(tf.multiply(tf.multiply(norm_h1_e, norm_r1_e), norm_t1_e), 1) + - tf.reduce_sum(tf.multiply(tf.multiply(norm_h2_e, norm_r2_e), norm_t2_e), 1)) / 2.0 - return tf.clip_by_value(init, -20, 20) + init = tf.reduce_sum(h1_e*r1_e*t1_e, 1) + tf.reduce_sum(h2_e*r2_e*t2_e, 1) / 2.0 + return -tf.clip_by_value(init, -20, 20) def get_reg(self, h, r, t): - h_e, r_e, r_rev_e, t_e = self.embed2(h, r, t) - regul_term = tf.nn.l2_loss(h_e) + tf.nn.l2_loss(t_e) + tf.nn.l2_loss(r_e) + tf.nn.l2_loss(r_rev_e) + num_batch = math.ceil(self.config.kg_meta.tot_train_triples / self.config.batch_size) + regul_term = (tf.nn.l2_loss(self.ent_head_embeddings) + tf.nn.l2_loss(self.ent_tail_embeddings) + + tf.nn.l2_loss(self.rel_embeddings) + tf.nn.l2_loss(self.rel_inv_embeddings)) / num_batch**2 return self.config.lmbda * regul_term @@ -128,12 +103,10 @@ def embed(self, h, r, t): def forward(self, h, r, t): h_e, r_e, t_e = self.embed(h, r, t) - init = tf.reduce_sum(tf.multiply(tf.multiply(h_e, r_e), t_e), 1) - return tf.clip_by_value(init, -20, 20) + init = tf.reduce_sum(h_e*r_e*t_e, 1) + return -tf.clip_by_value(init, -20, 20) def get_reg(self, h, r, t): - h_e, r_e, r_rev_e, t_e = self.embed2(h, r, t) - regul_term = 2.0 * (tf.nn.l2_loss(h_e) + tf.nn.l2_loss(t_e) + tf.nn.l2_loss(r_e) + tf.nn.l2_loss(r_rev_e)) - return self.config.lmbda * regul_term + return 2.0 * super().get_reg(h, r, t) From f76ba581bc25d032eaa879e2d940a2a28c2e92a9 Mon Sep 17 00:00:00 2001 From: Louis Yu Date: Mon, 20 Apr 2020 13:18:54 -0700 Subject: [PATCH 18/19] submit new hyperparameters. --- pykg2vec/config/hyperparams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py index eda7ed6..fd932f5 100644 --- a/pykg2vec/config/hyperparams.py +++ b/pykg2vec/config/hyperparams.py @@ -35,7 +35,7 @@ def __init__(self): 'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}, 'cp': {'learning_rate': 0.01, 'hidden_size': 50, 'batch_size': 128, 'epochs': 50, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001}, 'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001}, - 'simple': {'learning_rate': 0.01, 'hidden_size': 100, 'batch_size': 128, 'epochs': 1000, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001} + 'simple': {'learning_rate': 0.05, 'hidden_size': 100, 'batch_size': 128, 'epochs': 1000, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.1} } } From 72f5ec5ec31697ef9d7a29a1a9d648c64f35686f Mon Sep 17 00:00:00 2001 From: Louis Yu Date: Mon, 20 Apr 2020 17:12:59 -0700 Subject: [PATCH 19/19] adding version. --- pykg2vec/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pykg2vec/_version.py b/pykg2vec/_version.py index bf41adb..a3ebe30 100644 --- a/pykg2vec/_version.py +++ b/pykg2vec/_version.py @@ -1 +1 @@ -__version__ = "0.0.50" +__version__ = "0.0.51"