Merge pull request #142 from Sujit-O/development

Development
Sujit-O · Apr 21, 2020 · eb5ea60 · eb5ea60
2 parents 561d4ea + 19c789b
commit eb5ea60
Show file tree

Hide file tree

Showing 24 changed files with 786 additions and 80 deletions.
diff --git a/README.md b/README.md
@@ -86,22 +86,22 @@ $ python train.py -h
 $ python train.py -mn TransE
 
 # Train using different KGE methods.
-$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE|
-                       distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise]
+$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy|
+                       DistMult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise]
 
 # For KGE using projection-based loss function, use more processes for batch generation.
 $ python train.py -mn [ConvE|ConvKB|Proje_pointwise] -npg [the number of processes, 4 or 6]
 
 # Train TransE model using different benchmark datasets.
 $ python train.py -mn TransE -ds [fb15k|wn18|wn18_rr|yago3_10|fb15k_237|
-                                  ks|nations|umls|dl50a]
+                                  ks|nations|umls|dl50a|nell_955]
 
 ```
 Pykg2vec aims to include most of the state-of-the-art KGE methods. You can check [Implemented Algorithms](https://pykg2vec.readthedocs.io/en/latest/algos.html) for more details. Some models are still under development [Conv2D|TuckER].
 To ensure the correctness of included KGE methods we also use the hyperparameter settings from original papers to see if the result is consistent.
 ```bash
 # train KGE method with the hyperparameters used in original papers. (FB15k supported only)
-$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE|
+$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy|
                        distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise] -exp true -ds fb15k
 
 ```

diff --git a/pykg2vec/_version.py b/pykg2vec/_version.py
@@ -1 +1 @@
-__version__ = "0.0.50"
+__version__ = "0.0.51"
diff --git a/pykg2vec/config/config.py b/pykg2vec/config/config.py
@@ -40,10 +40,12 @@ def __init__(self):
         self.model_path = "pykg2vec.core"
         self.config_path = "pykg2vec.config.config"
 
-        self.modelMap = {"complex": "Complex.Complex",
+        self.modelMap = {"analogy": "ANALOGY.ANALOGY",
+                         "complex": "Complex.Complex",
                          "complexn3": "Complex.ComplexN3",
                          "conve": "ConvE.ConvE",
                          "convkb": "ConvKB.ConvKB",
+                         "cp": "CP.CP",
                          "hole": "HoLE.HoLE",
                          "distmult": "DistMult.DistMult",
                          "kg2e": "KG2E.KG2E",
@@ -52,6 +54,8 @@ def __init__(self):
                          "proje_pointwise": "ProjE_pointwise.ProjE_pointwise",
                          "rescal": "Rescal.Rescal",
                          "rotate": "RotatE.RotatE",
+                         "simple": "SimplE.SimplE",
+                         "simple_ignr": "SimplE.SimplE_ignr",
                          "slm": "SLM.SLM",
                          "sme": "SME.SME",
                          "sme_bl": "SME.SME_BL",
@@ -63,10 +67,12 @@ def __init__(self):
                          "transr": "TransR.TransR",
                          "tucker": "TuckER.TuckER"}
 
-        self.configMap = {"complex": "ComplexConfig",
+        self.configMap = {"analogy": "ANALOGYConfig",
+                          "complex": "ComplexConfig",
                           "complexn3": "ComplexConfig",
                           "conve": "ConvEConfig",
                           "convkb": "ConvKBConfig",
+                          "cp": "CPConfig",
                           "hole": "HoLEConfig",
                           "distmult": "DistMultConfig",
                           "kg2e": "KG2EConfig",
@@ -75,6 +81,8 @@ def __init__(self):
                           "proje_pointwise": "ProjE_pointwiseConfig",
                           "rescal": "RescalConfig",
                           "rotate": "RotatEConfig",
+                          "simple": "SimplEConfig",
+                          "simple_ignr": "SimplEConfig",
                           "slm": "SLMConfig",
                           "sme": "SMEConfig",
                           "sme_bl": "SMEConfig",
@@ -1318,6 +1326,7 @@ def __init__(self, args=None):
 
         BasicConfig.__init__(self, args)
 
+
 class TuckERConfig(BasicConfig):
     """This class defines the configuration for the TuckER Algorithm.
 
@@ -1480,4 +1489,164 @@ def __init__(self, args=None):
             'neg_rate': self.neg_rate,
 
         }
-        BasicConfig.__init__(self, args)
+        BasicConfig.__init__(self, args)
+
+
+class CPConfig(BasicConfig):
+    """This class defines the configuration for the Canonical Tensor Decomposition Algorithm.
+
+    CPConfig inherits the BasicConfig and defines the local arguements used in the
+    algorithm.
+
+    Attributes:
+      hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
+
+    Args:
+      lambda (float) : Weigth applied to the regularization in the loss function.
+      learning_rate (float): Defines the learning rate for the optimization.
+      L1_flag (bool): If True, perform L1 regularization on the model parameters.
+      hidden_size (int): Defines the size of the latent dimension for entities and relations.
+      batch_size (int): Defines the batch size for training the algorithm.
+      epochs (int): Defines the total number of epochs for training the algorithm.
+      margin (float): Defines the margin used between the positive and negative triple loss.
+      data (str): Defines the knowledge base dataset to be used for training the algorithm.
+      optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
+      sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
+
+    """
+
+    def __init__(self, args=None):
+        self.lmbda = args.lmbda
+        self.learning_rate = args.learning_rate
+        self.hidden_size = args.hidden_size
+        self.batch_size = args.batch_training
+        self.epochs = args.epochs
+        self.data = args.dataset_name
+        self.optimizer = args.optimizer
+        self.sampling = args.sampling
+        self.neg_rate = args.negrate
+
+        if args.exp is True:
+            paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'cp')
+            for key, value in paper_params.items():
+                self.__dict__[key] = value  # copy all the setting from the paper.
+
+        self.hyperparameters = {
+            'lmbda': self.lmbda,
+            'learning_rate': self.learning_rate,
+            'hidden_size': self.hidden_size,
+            'batch_size': self.batch_size,
+            'epochs': self.epochs,
+            'data': self.data,
+            'optimizer': self.optimizer,
+            'sampling': self.sampling,
+            'neg_rate': self.neg_rate,
+        }
+
+        BasicConfig.__init__(self, args)
+
+
+class ANALOGYConfig(BasicConfig):
+    """This class defines the configuration for the ANALOGY Algorithm.
+
+    ANALOGYConfig inherits the BasicConfig and defines the local arguements used in the
+    algorithm.
+
+    Attributes:
+      hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
+    Args:
+      lambda (float) : Weigth applied to the regularization in the loss function.
+      learning_rate (float): Defines the learning rate for the optimization.
+      L1_flag (bool): If True, perform L1 regularization on the model parameters.
+      hidden_size (int): Defines the size of the latent dimension for entities and relations.
+      batch_size (int): Defines the batch size for training the algorithm.
+      epochs (int): Defines the total number of epochs for training the algorithm.
+      margin (float): Defines the margin used between the positive and negative triple loss.
+      data (str): Defines the knowledge base dataset to be used for training the algorithm.
+      optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
+      sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
+    """
+
+    def __init__(self, args=None):
+        self.lmbda = args.lmbda
+        self.learning_rate = args.learning_rate
+        self.hidden_size = args.hidden_size
+        self.batch_size = args.batch_training
+        self.epochs = args.epochs
+        self.data = args.dataset_name
+        self.optimizer = args.optimizer
+        self.sampling = args.sampling
+        self.neg_rate = args.negrate
+
+        if args.exp is True:
+            paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'analogy')
+            for key, value in paper_params.items():
+                self.__dict__[key] = value  # copy all the setting from the paper.
+
+        self.hyperparameters = {
+            'lmbda': self.lmbda,
+            'learning_rate': self.learning_rate,
+            'hidden_size': self.hidden_size,
+            'batch_size': self.batch_size,
+            'epochs': self.epochs,
+            'data': self.data,
+            'optimizer': self.optimizer,
+            'sampling': self.sampling,
+            'neg_rate': self.neg_rate,
+        }
+
+        BasicConfig.__init__(self, args)
+
+
+class SimplEConfig(BasicConfig):
+    """This class defines the configuration for the SimplE Algorithm.
+
+    SimplEConfig inherits the BasicConfig and defines the local arguements used in the
+    algorithm.
+
+    Attributes:
+      hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
+
+    Args:
+      lambda (float) : Weigth applied to the regularization in the loss function.
+      learning_rate (float): Defines the learning rate for the optimization.
+      L1_flag (bool): If True, perform L1 regularization on the model parameters.
+      hidden_size (int): Defines the size of the latent dimension for entities and relations.
+      batch_size (int): Defines the batch size for training the algorithm.
+      epochs (int): Defines the total number of epochs for training the algorithm.
+      margin (float): Defines the margin used between the positive and negative triple loss.
+      data (str): Defines the knowledge base dataset to be used for training the algorithm.
+      optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
+      sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
+
+    """
+
+    def __init__(self, args=None):
+        self.lmbda = args.lmbda
+        self.learning_rate = args.learning_rate
+        self.hidden_size = args.hidden_size
+        self.batch_size = args.batch_training
+        self.epochs = args.epochs
+        self.data = args.dataset_name
+        self.optimizer = args.optimizer
+        self.sampling = args.sampling
+        self.neg_rate = args.negrate
+
+        if args.exp is True:
+            paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'simple')
+            for key, value in paper_params.items():
+                self.__dict__[key] = value  # copy all the setting from the paper.
+
+        self.hyperparameters = {
+            'lmbda': self.lmbda,
+            'learning_rate': self.learning_rate,
+            'hidden_size': self.hidden_size,
+            'batch_size': self.batch_size,
+            'epochs': self.epochs,
+            'data': self.data,
+            'optimizer': self.optimizer,
+            'sampling': self.sampling,
+            'neg_rate': self.neg_rate,
+        }
+
+        BasicConfig.__init__(self, args)
diff --git a/pykg2vec/config/hyperparams.py b/pykg2vec/config/hyperparams.py
@@ -31,8 +31,11 @@ def __init__(self):
           'complex' : {'learning_rate':  0.05,'hidden_size':200,'batch_size':5000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001},
           'distmult': {'learning_rate':   0.1,'hidden_size':100,'batch_size':50000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001},
           'proje_po': {'learning_rate':  0.01,'hidden_dropout': 0.5, 'hidden_size':200,'batch_size':200,' epochs':100, 'optimizer':'adam','lmbda':0.00001},
-          'conve'   : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0, 'epochs':100},
-          'convkb'  : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}
+          'conve'   : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0},
+          'convkb'  : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1},
+          'cp': {'learning_rate': 0.01, 'hidden_size': 50, 'batch_size': 128, 'epochs': 50, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001},
+          'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001},
+          'simple': {'learning_rate': 0.05, 'hidden_size': 100, 'batch_size': 128, 'epochs': 1000, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.1}
         }
       }
 
@@ -698,4 +701,109 @@ def __init__(self):
         self.training_threshold = [1.0, 2.0, 3.0]
         self.ncluster = [3, 4, 5, 6, 7]
         self.CRP_factor = [0.01, 0.05, 0.1]
-        self.weight_norm = [True, False]
+        self.weight_norm = [True, False]
+
+
+class CPParams:
+    """This class defines the hyperameters and its ranges for tuning Canonical Tensor Decomposition algorithm.
+
+    CPParams defines all the possibel values to be tuned for the algorithm. User may
+
+    change these values directly for performing the bayesian optimization of the hyper-parameters
+
+    Args:
+      lambda (list) : List of floating point values.
+      feature_map_dropout (list) :List of floating point values.
+      input_dropout (list) : List of floating point values.
+      hidden_dropout (list) : List of floating point values.
+      use_bias (list) :List of boolean values.
+      label_smoothing (list) : List of floating point values.
+      lr_decay (float) : List of floating point values.
+      learning_rate (list): List of floating point values.
+      L1_flag (list): List of boolean values.
+      hidden_size (list): List of integer values.
+      batch_size (list): List of integer values.
+      epochs (list): List of integer values.
+      margin (list): List of floating point values.
+      optimizer (list): List of strings defining the optimization algorithm to be used.
+      sampling (list): List of string defining the sampling to be used for generating negative examples.
+
+    """
+
+    def __init__(self):
+        self.search_space = {
+          'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
+          'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
+          'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
+          'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
+          'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
+          'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
+        }
+
+class ANALOGYParams:
+    """This class defines the hyperameters and its ranges for tuning ANALOGY algorithm.
+    ANALOGYParams defines all the possibel values to be tuned for the algorithm. User may
+    change these values directly for performing the bayesian optimization of the hyper-parameters
+    Args:
+      lambda (list) : List of floating point values.
+      feature_map_dropout (list) :List of floating point values.
+      input_dropout (list) : List of floating point values.
+      hidden_dropout (list) : List of floating point values.
+      use_bias (list) :List of boolean values.
+      label_smoothing (list) : List of floating point values.
+      lr_decay (float) : List of floating point values.
+      learning_rate (list): List of floating point values.
+      L1_flag (list): List of boolean values.
+      hidden_size (list): List of integer values.
+      batch_size (list): List of integer values.
+      epochs (list): List of integer values.
+      margin (list): List of floating point values.
+      optimizer (list): List of strings defining the optimization algorithm to be used.
+      sampling (list): List of string defining the sampling to be used for generating negative examples.
+    """
+
+    def __init__(self):
+        self.search_space = {
+          'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
+          'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
+          'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
+          'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
+          'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
+          'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
+        }
+
+class SimplEParams:
+    """This class defines the hyperameters and its ranges for tuning SimplE algorithm.
+
+    SimplEParams defines all the possibel values to be tuned for the algorithm. User may
+
+    change these values directly for performing the bayesian optimization of the hyper-parameters
+
+    Args:
+      lambda (list) : List of floating point values.
+      feature_map_dropout (list) :List of floating point values.
+      input_dropout (list) : List of floating point values.
+      hidden_dropout (list) : List of floating point values.
+      use_bias (list) :List of boolean values.
+      label_smoothing (list) : List of floating point values.
+      lr_decay (float) : List of floating point values.
+      learning_rate (list): List of floating point values.
+      L1_flag (list): List of boolean values.
+      hidden_size (list): List of integer values.
+      batch_size (list): List of integer values.
+      epochs (list): List of integer values.
+      margin (list): List of floating point values.
+      optimizer (list): List of strings defining the optimization algorithm to be used.
+      sampling (list): List of string defining the sampling to be used for generating negative examples.
+
+    """
+
+    def __init__(self):
+        self.search_space = {
+          'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
+          'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
+          'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
+          'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
+          'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
+          'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
+        }