Skip to content

Commit

Permalink
Merge pull request #142 from Sujit-O/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
louisccc committed Apr 21, 2020
2 parents 561d4ea + 19c789b commit eb5ea60
Show file tree
Hide file tree
Showing 24 changed files with 786 additions and 80 deletions.
8 changes: 4 additions & 4 deletions README.md
Expand Up @@ -86,22 +86,22 @@ $ python train.py -h
$ python train.py -mn TransE

# Train using different KGE methods.
$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE|
distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise]
$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy|
DistMult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise]

# For KGE using projection-based loss function, use more processes for batch generation.
$ python train.py -mn [ConvE|ConvKB|Proje_pointwise] -npg [the number of processes, 4 or 6]

# Train TransE model using different benchmark datasets.
$ python train.py -mn TransE -ds [fb15k|wn18|wn18_rr|yago3_10|fb15k_237|
ks|nations|umls|dl50a]
ks|nations|umls|dl50a|nell_955]

```
Pykg2vec aims to include most of the state-of-the-art KGE methods. You can check [Implemented Algorithms](https://pykg2vec.readthedocs.io/en/latest/algos.html) for more details. Some models are still under development [Conv2D|TuckER].
To ensure the correctness of included KGE methods we also use the hyperparameter settings from original papers to see if the result is consistent.
```bash
# train KGE method with the hyperparameters used in original papers. (FB15k supported only)
$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|RotatE|
$ python train.py -mn [TransE|TransD|TransH|TransG|TransM|TransR|Complex|Complexn3|CP|RotatE|Analogy|
distmult|KG2E|KG2E_EL|NTN|Rescal|SLM|SME|SME_BL|HoLE|ConvE|ConvKB|Proje_pointwise] -exp true -ds fb15k

```
Expand Down
2 changes: 1 addition & 1 deletion pykg2vec/_version.py
@@ -1 +1 @@
__version__ = "0.0.50"
__version__ = "0.0.51"
175 changes: 172 additions & 3 deletions pykg2vec/config/config.py
Expand Up @@ -40,10 +40,12 @@ def __init__(self):
self.model_path = "pykg2vec.core"
self.config_path = "pykg2vec.config.config"

self.modelMap = {"complex": "Complex.Complex",
self.modelMap = {"analogy": "ANALOGY.ANALOGY",
"complex": "Complex.Complex",
"complexn3": "Complex.ComplexN3",
"conve": "ConvE.ConvE",
"convkb": "ConvKB.ConvKB",
"cp": "CP.CP",
"hole": "HoLE.HoLE",
"distmult": "DistMult.DistMult",
"kg2e": "KG2E.KG2E",
Expand All @@ -52,6 +54,8 @@ def __init__(self):
"proje_pointwise": "ProjE_pointwise.ProjE_pointwise",
"rescal": "Rescal.Rescal",
"rotate": "RotatE.RotatE",
"simple": "SimplE.SimplE",
"simple_ignr": "SimplE.SimplE_ignr",
"slm": "SLM.SLM",
"sme": "SME.SME",
"sme_bl": "SME.SME_BL",
Expand All @@ -63,10 +67,12 @@ def __init__(self):
"transr": "TransR.TransR",
"tucker": "TuckER.TuckER"}

self.configMap = {"complex": "ComplexConfig",
self.configMap = {"analogy": "ANALOGYConfig",
"complex": "ComplexConfig",
"complexn3": "ComplexConfig",
"conve": "ConvEConfig",
"convkb": "ConvKBConfig",
"cp": "CPConfig",
"hole": "HoLEConfig",
"distmult": "DistMultConfig",
"kg2e": "KG2EConfig",
Expand All @@ -75,6 +81,8 @@ def __init__(self):
"proje_pointwise": "ProjE_pointwiseConfig",
"rescal": "RescalConfig",
"rotate": "RotatEConfig",
"simple": "SimplEConfig",
"simple_ignr": "SimplEConfig",
"slm": "SLMConfig",
"sme": "SMEConfig",
"sme_bl": "SMEConfig",
Expand Down Expand Up @@ -1318,6 +1326,7 @@ def __init__(self, args=None):

BasicConfig.__init__(self, args)


class TuckERConfig(BasicConfig):
"""This class defines the configuration for the TuckER Algorithm.
Expand Down Expand Up @@ -1480,4 +1489,164 @@ def __init__(self, args=None):
'neg_rate': self.neg_rate,

}
BasicConfig.__init__(self, args)
BasicConfig.__init__(self, args)


class CPConfig(BasicConfig):
"""This class defines the configuration for the Canonical Tensor Decomposition Algorithm.
CPConfig inherits the BasicConfig and defines the local arguements used in the
algorithm.
Attributes:
hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
Args:
lambda (float) : Weigth applied to the regularization in the loss function.
learning_rate (float): Defines the learning rate for the optimization.
L1_flag (bool): If True, perform L1 regularization on the model parameters.
hidden_size (int): Defines the size of the latent dimension for entities and relations.
batch_size (int): Defines the batch size for training the algorithm.
epochs (int): Defines the total number of epochs for training the algorithm.
margin (float): Defines the margin used between the positive and negative triple loss.
data (str): Defines the knowledge base dataset to be used for training the algorithm.
optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
"""

def __init__(self, args=None):
self.lmbda = args.lmbda
self.learning_rate = args.learning_rate
self.hidden_size = args.hidden_size
self.batch_size = args.batch_training
self.epochs = args.epochs
self.data = args.dataset_name
self.optimizer = args.optimizer
self.sampling = args.sampling
self.neg_rate = args.negrate

if args.exp is True:
paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'cp')
for key, value in paper_params.items():
self.__dict__[key] = value # copy all the setting from the paper.

self.hyperparameters = {
'lmbda': self.lmbda,
'learning_rate': self.learning_rate,
'hidden_size': self.hidden_size,
'batch_size': self.batch_size,
'epochs': self.epochs,
'data': self.data,
'optimizer': self.optimizer,
'sampling': self.sampling,
'neg_rate': self.neg_rate,
}

BasicConfig.__init__(self, args)


class ANALOGYConfig(BasicConfig):
"""This class defines the configuration for the ANALOGY Algorithm.
ANALOGYConfig inherits the BasicConfig and defines the local arguements used in the
algorithm.
Attributes:
hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
Args:
lambda (float) : Weigth applied to the regularization in the loss function.
learning_rate (float): Defines the learning rate for the optimization.
L1_flag (bool): If True, perform L1 regularization on the model parameters.
hidden_size (int): Defines the size of the latent dimension for entities and relations.
batch_size (int): Defines the batch size for training the algorithm.
epochs (int): Defines the total number of epochs for training the algorithm.
margin (float): Defines the margin used between the positive and negative triple loss.
data (str): Defines the knowledge base dataset to be used for training the algorithm.
optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
"""

def __init__(self, args=None):
self.lmbda = args.lmbda
self.learning_rate = args.learning_rate
self.hidden_size = args.hidden_size
self.batch_size = args.batch_training
self.epochs = args.epochs
self.data = args.dataset_name
self.optimizer = args.optimizer
self.sampling = args.sampling
self.neg_rate = args.negrate

if args.exp is True:
paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'analogy')
for key, value in paper_params.items():
self.__dict__[key] = value # copy all the setting from the paper.

self.hyperparameters = {
'lmbda': self.lmbda,
'learning_rate': self.learning_rate,
'hidden_size': self.hidden_size,
'batch_size': self.batch_size,
'epochs': self.epochs,
'data': self.data,
'optimizer': self.optimizer,
'sampling': self.sampling,
'neg_rate': self.neg_rate,
}

BasicConfig.__init__(self, args)


class SimplEConfig(BasicConfig):
"""This class defines the configuration for the SimplE Algorithm.
SimplEConfig inherits the BasicConfig and defines the local arguements used in the
algorithm.
Attributes:
hyperparameters (dict): Defines the dictionary of hyperparameters to be used by bayesian optimizer for tuning.
Args:
lambda (float) : Weigth applied to the regularization in the loss function.
learning_rate (float): Defines the learning rate for the optimization.
L1_flag (bool): If True, perform L1 regularization on the model parameters.
hidden_size (int): Defines the size of the latent dimension for entities and relations.
batch_size (int): Defines the batch size for training the algorithm.
epochs (int): Defines the total number of epochs for training the algorithm.
margin (float): Defines the margin used between the positive and negative triple loss.
data (str): Defines the knowledge base dataset to be used for training the algorithm.
optimizer (str): Defines the optimization algorithm such as adam, sgd, adagrad, etc.
sampling (str): Defines the sampling (bern or uniform) for corrupting the triples.
"""

def __init__(self, args=None):
self.lmbda = args.lmbda
self.learning_rate = args.learning_rate
self.hidden_size = args.hidden_size
self.batch_size = args.batch_training
self.epochs = args.epochs
self.data = args.dataset_name
self.optimizer = args.optimizer
self.sampling = args.sampling
self.neg_rate = args.negrate

if args.exp is True:
paper_params = HyperparamterLoader().load_hyperparameter(args.dataset_name, 'simple')
for key, value in paper_params.items():
self.__dict__[key] = value # copy all the setting from the paper.

self.hyperparameters = {
'lmbda': self.lmbda,
'learning_rate': self.learning_rate,
'hidden_size': self.hidden_size,
'batch_size': self.batch_size,
'epochs': self.epochs,
'data': self.data,
'optimizer': self.optimizer,
'sampling': self.sampling,
'neg_rate': self.neg_rate,
}

BasicConfig.__init__(self, args)
114 changes: 111 additions & 3 deletions pykg2vec/config/hyperparams.py
Expand Up @@ -31,8 +31,11 @@ def __init__(self):
'complex' : {'learning_rate': 0.05,'hidden_size':200,'batch_size':5000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001},
'distmult': {'learning_rate': 0.1,'hidden_size':100,'batch_size':50000,'epochs':1000,'optimizer':'adagrad','sampling':"uniform",'neg_rate':1,'lmbda':0.0001},
'proje_po': {'learning_rate': 0.01,'hidden_dropout': 0.5, 'hidden_size':200,'batch_size':200,' epochs':100, 'optimizer':'adam','lmbda':0.00001},
'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0, 'epochs':100},
'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1}
'conve' : {'learning_rate': 0.003,'optimizer':'adam', 'label_smoothing':0.1, 'batch_size':128, 'hidden_size':200, 'hidden_size_1':20, 'input_dropout':0.2, 'feature_map_dropout':0.2, 'hidden_dropout':0.3,'neg_rate':0},
'convkb' : {'lmbda': 0.001,'filter_sizes':[1,2],'num_filters':50,'learning_rate': 0.0001,'optimizer':'adam','hidden_size': 100,'batch_size': 128,'epochs':200,'neg_rate':1},
'cp': {'learning_rate': 0.01, 'hidden_size': 50, 'batch_size': 128, 'epochs': 50, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001},
'analogy': {'learning_rate': 0.1, 'hidden_size': 200, 'batch_size': 128, 'epochs': 500, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.0001},
'simple': {'learning_rate': 0.05, 'hidden_size': 100, 'batch_size': 128, 'epochs': 1000, 'optimizer': 'adagrad', 'sampling': "uniform", 'neg_rate': 1, 'lmbda': 0.1}
}
}

Expand Down Expand Up @@ -698,4 +701,109 @@ def __init__(self):
self.training_threshold = [1.0, 2.0, 3.0]
self.ncluster = [3, 4, 5, 6, 7]
self.CRP_factor = [0.01, 0.05, 0.1]
self.weight_norm = [True, False]
self.weight_norm = [True, False]


class CPParams:
"""This class defines the hyperameters and its ranges for tuning Canonical Tensor Decomposition algorithm.
CPParams defines all the possibel values to be tuned for the algorithm. User may
change these values directly for performing the bayesian optimization of the hyper-parameters
Args:
lambda (list) : List of floating point values.
feature_map_dropout (list) :List of floating point values.
input_dropout (list) : List of floating point values.
hidden_dropout (list) : List of floating point values.
use_bias (list) :List of boolean values.
label_smoothing (list) : List of floating point values.
lr_decay (float) : List of floating point values.
learning_rate (list): List of floating point values.
L1_flag (list): List of boolean values.
hidden_size (list): List of integer values.
batch_size (list): List of integer values.
epochs (list): List of integer values.
margin (list): List of floating point values.
optimizer (list): List of strings defining the optimization algorithm to be used.
sampling (list): List of string defining the sampling to be used for generating negative examples.
"""

def __init__(self):
self.search_space = {
'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
}

class ANALOGYParams:
"""This class defines the hyperameters and its ranges for tuning ANALOGY algorithm.
ANALOGYParams defines all the possibel values to be tuned for the algorithm. User may
change these values directly for performing the bayesian optimization of the hyper-parameters
Args:
lambda (list) : List of floating point values.
feature_map_dropout (list) :List of floating point values.
input_dropout (list) : List of floating point values.
hidden_dropout (list) : List of floating point values.
use_bias (list) :List of boolean values.
label_smoothing (list) : List of floating point values.
lr_decay (float) : List of floating point values.
learning_rate (list): List of floating point values.
L1_flag (list): List of boolean values.
hidden_size (list): List of integer values.
batch_size (list): List of integer values.
epochs (list): List of integer values.
margin (list): List of floating point values.
optimizer (list): List of strings defining the optimization algorithm to be used.
sampling (list): List of string defining the sampling to be used for generating negative examples.
"""

def __init__(self):
self.search_space = {
'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
}

class SimplEParams:
"""This class defines the hyperameters and its ranges for tuning SimplE algorithm.
SimplEParams defines all the possibel values to be tuned for the algorithm. User may
change these values directly for performing the bayesian optimization of the hyper-parameters
Args:
lambda (list) : List of floating point values.
feature_map_dropout (list) :List of floating point values.
input_dropout (list) : List of floating point values.
hidden_dropout (list) : List of floating point values.
use_bias (list) :List of boolean values.
label_smoothing (list) : List of floating point values.
lr_decay (float) : List of floating point values.
learning_rate (list): List of floating point values.
L1_flag (list): List of boolean values.
hidden_size (list): List of integer values.
batch_size (list): List of integer values.
epochs (list): List of integer values.
margin (list): List of floating point values.
optimizer (list): List of strings defining the optimization algorithm to be used.
sampling (list): List of string defining the sampling to be used for generating negative examples.
"""

def __init__(self):
self.search_space = {
'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
'hidden_size': scope.int(hp.qloguniform('hidden_size', np.log(8), np.log(256),1)),
'batch_size': scope.int(hp.qloguniform('batch_size', np.log(8), np.log(4096),1)),
'lmbda': hp.loguniform('lmbda', np.log(0.00001), np.log(0.001)),
'optimizer': hp.choice('optimizer', ["adam", "sgd", 'rms']),
'epochs': hp.choice('epochs', [10]) # always choose 10 training epochs.
}

0 comments on commit eb5ea60

Please sign in to comment.