Skip to content

Commit

Permalink
Merge pull request #237 from Accenture/feature/218
Browse files Browse the repository at this point in the history
Fixes #218
  • Loading branch information
lukostaz committed May 19, 2021
2 parents 4f036db + a531caf commit 207eb19
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 44 deletions.
14 changes: 10 additions & 4 deletions ampligraph/latent_features/models/ComplEx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ampligraph.latent_features import constants as constants
from ampligraph.latent_features.initializers import DEFAULT_XAVIER_IS_UNIFORM
import tensorflow as tf
import time


@register_model("ComplEx", ["negative_corruption_entities"])
Expand Down Expand Up @@ -185,21 +186,26 @@ def __init__(self,
def _initialize_parameters(self):
"""Initialize the complex embeddings.
"""
timestamp = int(time.time() * 1e6)
if not self.dealing_with_large_graphs:
self.ent_emb = tf.get_variable('ent_emb', shape=[len(self.ent_to_idx), self.internal_k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[len(self.ent_to_idx), self.internal_k],
initializer=self.initializer.get_entity_initializer(
len(self.ent_to_idx), self.internal_k),
dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.internal_k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.internal_k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.internal_k),
dtype=tf.float32)
else:
# initialize entity embeddings to zero (these are reinitialized every batch by batch embeddings)
self.ent_emb = tf.get_variable('ent_emb', shape=[self.batch_size * 2, self.internal_k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[self.batch_size * 2, self.internal_k],
initializer=tf.zeros_initializer(),
dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.internal_k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.internal_k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.internal_k),
dtype=tf.float32)
Expand Down
54 changes: 31 additions & 23 deletions ampligraph/latent_features/models/ConvE.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sklearn.utils import check_random_state
from tqdm import tqdm
from functools import partial
import time

from .EmbeddingModel import EmbeddingModel, register_model, ENTITY_THRESHOLD
from ..initializers import DEFAULT_XAVIER_IS_UNIFORM
Expand Down Expand Up @@ -241,38 +242,44 @@ def _initialize_parameters(self):
and all relation embeddings.
Overload this function if the parameters needs to be initialized differently.
"""

timestamp = int(time.time() * 1e6)
if not self.dealing_with_large_graphs:

with tf.variable_scope('meta'):
self.tf_is_training = tf.Variable(False, trainable=False, name='is_training')
self.tf_is_training = tf.Variable(False, trainable=False)
self.set_training_true = tf.assign(self.tf_is_training, True)
self.set_training_false = tf.assign(self.tf_is_training, False)

nfilters = self.embedding_model_params['conv_filters']
ninput = self.embedding_model_params['embed_image_depth']
ksize = self.embedding_model_params['conv_kernel_size']
dense_dim = self.embedding_model_params['dense_dim']

self.ent_emb = tf.get_variable('ent_emb', shape=[len(self.ent_to_idx), self.k],

self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[len(self.ent_to_idx), self.k],
initializer=self.initializer.get_entity_initializer(
len(self.ent_to_idx), self.k),
dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.k),
dtype=tf.float32)

self.conv2d_W = tf.get_variable('conv2d_weights', shape=[ksize, ksize, ninput, nfilters],
self.conv2d_W = tf.get_variable('conv2d_weights_{}'.format(timestamp),
shape=[ksize, ksize, ninput, nfilters],
initializer=tf.initializers.he_normal(seed=self.seed),
dtype=tf.float32)
self.conv2d_B = tf.get_variable('conv2d_bias', shape=[nfilters],
self.conv2d_B = tf.get_variable('conv2d_bias_{}'.format(timestamp),
shape=[nfilters],
initializer=tf.zeros_initializer(), dtype=tf.float32)

self.dense_W = tf.get_variable('dense_weights', shape=[dense_dim, self.k],
self.dense_W = tf.get_variable('dense_weights_{}'.format(timestamp),
shape=[dense_dim, self.k],
initializer=tf.initializers.he_normal(seed=self.seed),
dtype=tf.float32)
self.dense_B = tf.get_variable('dense_bias', shape=[self.k],
self.dense_B = tf.get_variable('dense_bias_{}'.format(timestamp),
shape=[self.k],
initializer=tf.zeros_initializer(), dtype=tf.float32)

if self.embedding_model_params['use_batchnorm']:
Expand All @@ -293,7 +300,8 @@ def _initialize_parameters(self):
'moving_variance': np.ones(shape=[1])}}

if self.embedding_model_params['use_bias']:
self.bias = tf.get_variable('activation_bias', shape=[1, len(self.ent_to_idx)],
self.bias = tf.get_variable('activation_bias_{}'.format(timestamp),
shape=[1, len(self.ent_to_idx)],
initializer=tf.zeros_initializer(), dtype=tf.float32)

else:
Expand Down Expand Up @@ -388,12 +396,12 @@ def _load_model_from_trained_params(self):
self.batch_size = int(np.ceil(len(self.ent_to_idx) / self.batches_count))

with tf.variable_scope('meta'):
self.tf_is_training = tf.Variable(False, trainable=False, name='is_training')
self.tf_is_training = tf.Variable(False, trainable=False)
self.set_training_true = tf.assign(self.tf_is_training, True)
self.set_training_false = tf.assign(self.tf_is_training, False)

self.ent_emb = tf.Variable(self.trained_model_params['ent_emb'], dtype=tf.float32, name='ent_emb')
self.rel_emb = tf.Variable(self.trained_model_params['rel_emb'], dtype=tf.float32, name='rel_emb')
self.ent_emb = tf.Variable(self.trained_model_params['ent_emb'], dtype=tf.float32)
self.rel_emb = tf.Variable(self.trained_model_params['rel_emb'], dtype=tf.float32)

self.conv2d_W = tf.Variable(self.trained_model_params['conv2d_W'], dtype=tf.float32)
self.conv2d_B = tf.Variable(self.trained_model_params['conv2d_B'], dtype=tf.float32)
Expand Down Expand Up @@ -456,8 +464,8 @@ def _batchnorm(X, key, axis):
return x

# Inputs
stacked_emb = tf.stack([e_s, e_p], axis=2, name='stacked_embeddings')
self.inputs = tf.reshape(stacked_emb, name='embed_image',
stacked_emb = tf.stack([e_s, e_p], axis=2)
self.inputs = tf.reshape(stacked_emb,
shape=[tf.shape(stacked_emb)[0], self.embedding_model_params['embed_image_height'],
self.embedding_model_params['embed_image_width'], 1])

Expand All @@ -478,7 +486,7 @@ def _batchnorm(X, key, axis):
# Batch normalization will cancel out bias, so only add bias term if not using batchnorm
x = tf.nn.bias_add(x, self.conv2d_B)

x = tf.nn.relu(x, name='conv_relu')
x = tf.nn.relu(x)

if not self.embedding_model_params['dropout_conv'] is None:
x = _dropout(x, rate=self.embedding_model_params['dropout_conv'])
Expand All @@ -504,11 +512,11 @@ def _batchnorm(X, key, axis):
if not self.embedding_model_params['dropout_dense'] is None:
x = _dropout(x, rate=self.embedding_model_params['dropout_dense'])

x = tf.nn.relu(x, name='dense_relu')
x = tf.matmul(x, tf.transpose(self.ent_emb), name='matmul')
x = tf.nn.relu(x)
x = tf.matmul(x, tf.transpose(self.ent_emb))

if self.embedding_model_params['use_bias']:
x = tf.add(x, self.bias, name='add_bias')
x = tf.add(x, self.bias)

self.scores = x

Expand Down Expand Up @@ -771,10 +779,10 @@ def _initialize_eval_graph(self, mode='test'):
e_s, e_p, e_o = self._lookup_embeddings(self.X_test_tf)

# Scores for all triples
scores = tf.sigmoid(tf.squeeze(self._fn(e_s, e_p, e_o)), name='sigmoid_scores')
scores = tf.sigmoid(tf.squeeze(self._fn(e_s, e_p, e_o)))

# Score of positive triple
self.score_positive = tf.gather(scores, indices=self.X_test_tf[:, 2], name='score_positive')
self.score_positive = tf.gather(scores, indices=self.X_test_tf[:, 2])

# Scores for positive triples
self.scores_filtered = tf.boolean_mask(scores, tf.cast(self.X_test_filter_tf, tf.bool))
Expand All @@ -786,7 +794,7 @@ def _initialize_eval_graph(self, mode='test'):
self.filter_rank = self.perform_comparision(self.scores_filtered, self.score_positive)

# Rank of triple, with other positives filtered out.
self.rank = tf.subtract(self.total_rank, self.filter_rank, name='rank') + 1
self.rank = tf.subtract(self.total_rank, self.filter_rank) + 1

# NOTE: if having trouble with the above rank calculation, consider when test triple
# has the highest score (total_rank=1, filter_rank=1)
Expand Down Expand Up @@ -1042,7 +1050,7 @@ def _initialize_eval_graph_subject(self, mode='test'):
e_s, e_p, e_o = self._lookup_embeddings(self.subject_corr)

# Scores for all triples
self.sigmoid_scores = tf.sigmoid(tf.squeeze(self._fn(e_s, e_p, e_o)), name='sigmoid_scores')
self.sigmoid_scores = tf.sigmoid(tf.squeeze(self._fn(e_s, e_p, e_o)))

def _get_subject_ranks(self, dataset_handle, corruption_batch_size=None):
""" Internal function for obtaining subject ranks.
Expand Down
34 changes: 22 additions & 12 deletions ampligraph/latent_features/models/ConvKB.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .EmbeddingModel import EmbeddingModel, register_model, ENTITY_THRESHOLD
from ..initializers import DEFAULT_XAVIER_IS_UNIFORM
from ampligraph.latent_features import constants as constants
import time

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -193,25 +194,30 @@ def _initialize_parameters(self):
"""

with tf.variable_scope('meta'):
self.tf_is_training = tf.Variable(False, trainable=False, name='is_training')
self.tf_is_training = tf.Variable(False, trainable=False)
self.set_training_true = tf.assign(self.tf_is_training, True)
self.set_training_false = tf.assign(self.tf_is_training, False)

timestamp = int(time.time() * 1e6)
if not self.dealing_with_large_graphs:

self.ent_emb = tf.get_variable('ent_emb', shape=[len(self.ent_to_idx), self.k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[len(self.ent_to_idx), self.k],
initializer=self.initializer.get_entity_initializer(
len(self.ent_to_idx), self.k), dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.k), dtype=tf.float32)

else:

self.ent_emb = tf.get_variable('ent_emb', shape=[self.batch_size * 2, self.internal_k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[self.batch_size * 2, self.internal_k],
initializer=tf.zeros_initializer(), dtype=tf.float32)

self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.internal_k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.internal_k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.internal_k), dtype=tf.float32)

Expand All @@ -225,17 +231,21 @@ def _initialize_parameters(self):
conv_shape = [3, filter_size, 1, num_filters]
conv_name = 'conv-maxpool-{}'.format(filter_size)
weights_init = tf.initializers.truncated_normal(seed=self.seed)
self.conv_weights[conv_name] = {'weights': tf.get_variable('{}_W'.format(conv_name), shape=conv_shape,
self.conv_weights[conv_name] = {'weights': tf.get_variable('{}_W_{}'.format(conv_name, timestamp),
shape=conv_shape,
trainable=True, dtype=tf.float32,
initializer=weights_init),
'biases': tf.get_variable('{}_B'.format(conv_name), shape=[num_filters],
'biases': tf.get_variable('{}_B_{}'.format(conv_name, timestamp),
shape=[num_filters],
trainable=True, dtype=tf.float32,
initializer=tf.zeros_initializer())}

self.dense_W = tf.get_variable('dense_weights', shape=[dense_dim, num_outputs], trainable=True,
self.dense_W = tf.get_variable('dense_weights_{}'.format(timestamp),
shape=[dense_dim, num_outputs], trainable=True,
initializer=tf.keras.initializers.he_normal(seed=self.seed),
dtype=tf.float32)
self.dense_B = tf.get_variable('dense_bias', shape=[num_outputs], trainable=False,
self.dense_B = tf.get_variable('dense_bias_{}'.format(timestamp),
shape=[num_outputs], trainable=False,
initializer=tf.zeros_initializer(), dtype=tf.float32)

def get_embeddings(self, entities, embedding_type='entity'):
Expand Down Expand Up @@ -332,7 +342,7 @@ def _load_model_from_trained_params(self):
self.rel_emb = tf.Variable(self.trained_model_params['rel_emb'], dtype=tf.float32)

with tf.variable_scope('meta'):
self.tf_is_training = tf.Variable(False, trainable=False, name='is_training')
self.tf_is_training = tf.Variable(False, trainable=False)
self.set_training_true = tf.assign(self.tf_is_training, True)
self.set_training_false = tf.assign(self.tf_is_training, False)

Expand Down Expand Up @@ -394,9 +404,9 @@ def _fn(self, e_s, e_p, e_o):
dropout_rate = tf.cond(self.tf_is_training,
true_fn=lambda: tf.constant(self.embedding_model_params['dropout']),
false_fn=lambda: tf.constant(0, dtype=tf.float32))
x = tf.nn.dropout(x, rate=dropout_rate, name='dropout_dense')
x = tf.nn.dropout(x, rate=dropout_rate)

self.scores = tf.nn.xw_plus_b(x, self.dense_W, self.dense_B, name="scores")
self.scores = tf.nn.xw_plus_b(x, self.dense_W, self.dense_B)

return tf.squeeze(self.scores)

Expand Down
16 changes: 11 additions & 5 deletions ampligraph/latent_features/models/EmbeddingModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ampligraph.datasets import AmpligraphDatasetAdapter, NumpyDatasetAdapter
from functools import partial
from ampligraph.latent_features import constants as constants
import time

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -462,7 +463,7 @@ def _lookup_embeddings(self, x):
A Tensor that includes the embeddings of the objects.
"""
e_s = self._entity_lookup(x[:, 0])
e_p = tf.nn.embedding_lookup(self.rel_emb, x[:, 1], name='embedding_lookup_predicate')
e_p = tf.nn.embedding_lookup(self.rel_emb, x[:, 1])
e_o = self._entity_lookup(x[:, 2])
return e_s, e_p, e_o

Expand Down Expand Up @@ -495,21 +496,26 @@ def _initialize_parameters(self):
and all relation embeddings.
Overload this function if the parameters needs to be initialized differently.
"""
timestamp = int(time.time() * 1e6)
if not self.dealing_with_large_graphs:
self.ent_emb = tf.get_variable('ent_emb', shape=[len(self.ent_to_idx), self.internal_k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[len(self.ent_to_idx), self.internal_k],
initializer=self.initializer.get_entity_initializer(
len(self.ent_to_idx), self.internal_k),
dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.internal_k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.internal_k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.internal_k),
dtype=tf.float32)
else:
# initialize entity embeddings to zero (these are reinitialized every batch by batch embeddings)
self.ent_emb = tf.get_variable('ent_emb', shape=[self.batch_size * 2, self.internal_k],
self.ent_emb = tf.get_variable('ent_emb_{}'.format(timestamp),
shape=[self.batch_size * 2, self.internal_k],
initializer=tf.zeros_initializer(),
dtype=tf.float32)
self.rel_emb = tf.get_variable('rel_emb', shape=[len(self.rel_to_idx), self.internal_k],
self.rel_emb = tf.get_variable('rel_emb_{}'.format(timestamp),
shape=[len(self.rel_to_idx), self.internal_k],
initializer=self.initializer.get_relation_initializer(
len(self.rel_to_idx), self.internal_k),
dtype=tf.float32)
Expand Down

0 comments on commit 207eb19

Please sign in to comment.