In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import pandas as pd
tf.config.set_soft_device_placement(False)
tf.debugging.set_log_device_placement(True)
import numpy as np
from ampligraph.datasets import load_fb15k_237, load_yago3_10
from ampligraph.evaluation.protocol import create_mappings, to_idx

from ampligraph.evaluation.metrics import mrr_score, hits_at_n_score, mr_score
import time
print(tf.__version__)
assert(tf.__version__.startswith('2.3'))

from ampligraph.datasets import load_fb15k_237, load_fb13, load_fb15k, load_wn11, load_wn18, load_wn18rr, load_yago3_10
from ampligraph.latent_features import ScoringBasedEmbeddingModel

2.3.1


In [2]:
#loss function
def nll(scores_pos, scores_neg, eta):
    scores_neg_reshaped = tf.reshape(scores_neg, [eta, tf.shape(scores_pos)[0]])
    neg_exp = tf.exp(scores_neg_reshaped)
    pos_exp = tf.exp(scores_pos)
    softmax_score = pos_exp / (tf.reduce_sum(neg_exp, axis=0) + pos_exp)

    loss = -tf.reduce_sum(tf.math.log(softmax_score))
    return loss

# Train/eval without partition

In [3]:
optim = tf.optimizers.Adagrad(learning_rate=0.1)
# optim = 'adam'

# loss = nll
# loss = 'self_adversarial'
from ampligraph.latent_features.loss_functions import SelfAdversarialLoss
loss = SelfAdversarialLoss({'margin': 0.1, 'alpha': 5})

model = ScoringBasedEmbeddingModel(eta=50, 
                                     k=300,
                                     scoring_type='TransE')



model.compile(optimizer=optim, loss=loss)

from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('./chkpt1', monitor='loss', verbose=1, save_best_only=True, mode='min')

model.fit('/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt',
             batch_size=10000,
             epochs=10,
             validation_freq=5,
             validation_batch_size=100,
             validation_data = '/home/spai/code/ampligraph_projects/dataset/fb15k-237/valid.txt',
         callbacks=[checkpoint])




Epoch 1/10
     29/Unknown - 1s 51ms/step - loss: 13395.2031
Epoch 00001: loss improved from inf to 13395.20312, saving model to ./chkpt1
Epoch 2/10
Epoch 00002: loss improved from 13395.20312 to 12786.13379, saving model to ./chkpt1
Epoch 3/10
Epoch 00003: loss improved from 12786.13379 to 12509.62207, saving model to ./chkpt1
Epoch 4/10
Epoch 00004: loss improved from 12509.62207 to 12343.49121, saving model to ./chkpt1
Epoch 5/10
9 triples containing invalid keys skipped!

Epoch 00005: loss improved from 12343.49121 to 12230.34668, saving model to ./chkpt1
Epoch 6/10
Epoch 00006: loss improved from 12230.34668 to 12145.55566, saving model to ./chkpt1
Epoch 7/10
Epoch 00007: loss improved from 12145.55566 to 12080.31348, saving model to ./chkpt1
Epoch 8/10
Epoch 00008: loss improved from 12080.31348 to 12026.89160, saving model to ./chkpt1
Epoch 9/10
Epoch 00009: loss improved from 12026.89160 to 11982.40527, saving model to ./chkpt1
Epoch 10/10
9 triples containing invalid keys skip

<tensorflow.python.keras.callbacks.History at 0x7fdc6c09b7d0>

In [4]:

start = time.time()
ranks = model.evaluate('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                       batch_size=100,
                       corrupt_side='s,o',
         use_filter={'train':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt',
                  'valid':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/valid.txt',
                  'test':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt'})
end = time.time()
print('Time taken:', end-start)
mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!

9 triples containing invalid keys skipped!

28 triples containing invalid keys skipped!
Time taken: 99.60667252540588


(329.3209218123104,
 0.2040090857039047,
 0.12819258244446619,
 0.3548047754183384)

## Load Checkpoint and evaluate

In [5]:
start = time.time()
loaded_model = ScoringBasedEmbeddingModel(eta=50, 
                                     k=300, 
                                     scoring_type='TransE')
loaded_model.load_weights('./chkpt1')
ranks = loaded_model.evaluate('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                       batch_size=100,
                       corrupt_side='s,o',
         use_filter={'train':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt',
                  'valid':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/valid.txt',
                  'test':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt'})
end = time.time()
print('Time taken:', end-start)
mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!

9 triples containing invalid keys skipped!

28 triples containing invalid keys skipped!
Time taken: 99.79198861122131


(329.3209218123104,
 0.2040090857039047,
 0.12819258244446619,
 0.3548047754183384)

# Training/eval with partition 

## Training with RandomEdges partitioner

In [6]:
from ampligraph.datasets import DummyBackend, SQLiteAdapter
from ampligraph.datasets import GraphDataLoader
from ampligraph.datasets.graph_partitioner import PARTITION_ALGO_REGISTRY
dataset_loader = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt', 
                                 backend=SQLiteAdapter,
                                    batch_size=1000, 
                                    dataset_type='train', 
                                    epochs=10,
                                    use_indexer=True)

# Choose the partitioner 
partitioner = PARTITION_ALGO_REGISTRY.get('RandomEdges')(dataset_loader, k=3)

_split: memory before: 0.0Bytes, after: 13.058MB, consumed: 13.058MB; exec time: 33.213s


In [7]:
optim = tf.optimizers.Adam(learning_rate=0.001, amsgrad=True)

partitioned_model = ScoringBasedEmbeddingModel(eta=5, 
                                     k=300, 
                                     scoring_type='TransE')
partitioned_model.compile(optimizer=optim, loss=nll)

In [8]:

start = time.time()
partitioned_model.fit(partitioner,
                     batch_size=1000, use_partitioning=True,
                     epochs=10)
print((time.time()-start))

Epoch 1/10
    274/Unknown - 12s 44ms/step - loss: 1579.7837
Epoch 00001: loss improved from inf to 1579.78369, saving model to ./chkpt1
Epoch 2/10
Epoch 00002: loss improved from 1579.78369 to 1403.83862, saving model to ./chkpt1
Epoch 3/10
Epoch 00003: loss improved from 1403.83862 to 1248.08704, saving model to ./chkpt1
Epoch 4/10
Epoch 00004: loss improved from 1248.08704 to 1118.72729, saving model to ./chkpt1
Epoch 5/10
Epoch 00005: loss improved from 1118.72729 to 1014.01208, saving model to ./chkpt1
Epoch 6/10
Epoch 00006: loss improved from 1014.01208 to 929.17847, saving model to ./chkpt1
Epoch 7/10
Epoch 00007: loss improved from 929.17847 to 859.32758, saving model to ./chkpt1
Epoch 8/10
Epoch 00008: loss improved from 859.32758 to 801.14594, saving model to ./chkpt1
Epoch 9/10
Epoch 00009: loss improved from 801.14594 to 751.79913, saving model to ./chkpt1
Epoch 10/10
Epoch 00010: loss improved from 751.79913 to 709.63336, saving model to ./chkpt1
180.3483395576477


In [9]:

dataset_loader_test = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                                        backend=SQLiteAdapter,
                                        batch_size=400, 
                                        dataset_type='test', 
                                        epochs=1,
                                        use_indexer=partitioned_model.data_handler.get_mapper())

start = time.time()
ranks = partitioned_model.evaluate(dataset_loader_test, 
                                   batch_size=400)
end = time.time()
print('Time taken:', end-start)

mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!
Time taken: 132.15588426589966


(504.75254901960784, 0.09983318770378669, 0.0, 0.27379901960784314)

## Save and Load

In [10]:
partitioned_model.save_weights('./best_model')

In [11]:
loaded_part_model = ScoringBasedEmbeddingModel(eta=5, 
                                     k=300, 
                                     scoring_type='TransE')

loaded_part_model.load_weights('./best_model')

In [12]:

dataset_loader_test = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                                        backend=SQLiteAdapter,
                                        batch_size=400, 
                                        dataset_type='test', 
                                        epochs=1,
                                        use_indexer=loaded_part_model.data_indexer)

start = time.time()
ranks = loaded_part_model.evaluate(dataset_loader_test, 
                                   batch_size=400)
end = time.time()
print('Time taken:', end-start)

mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!
Time taken: 135.7653512954712


(504.75254901960784, 0.09983318770378669, 0.0, 0.27379901960784314)

# Training/eval with partition (default Partitioning Approach)

In [3]:
from ampligraph.datasets import DummyBackend, SQLiteAdapter
from ampligraph.datasets import GraphDataLoader
from ampligraph.datasets.graph_partitioner import PARTITION_ALGO_REGISTRY


In [4]:
optim = tf.optimizers.Adam(learning_rate=0.001, amsgrad=True)

partitioned_model = ScoringBasedEmbeddingModel(eta=5, 
                                     k=300, 
                                     scoring_type='TransE')
partitioned_model.compile(optimizer=optim, loss=nll)


start = time.time()
partitioned_model.fit('/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt',
                     batch_size=1000, use_partitioning=True,
                     epochs=10)
print((time.time()-start))

_split: memory before: 896.0Bytes, after: 12.904MB, consumed: 12.903MB; exec time: 146.49s
Epoch 1/10
    277/Unknown - 5s 20ms/step - loss: 1689.7427
Epoch 00001: loss improved from inf to 1689.05029, saving model to ./chkpt1
Epoch 2/10
Epoch 00002: loss improved from 1689.05029 to 1590.34705, saving model to ./chkpt1
Epoch 3/10
Epoch 00003: loss improved from 1590.34705 to 1481.84058, saving model to ./chkpt1
Epoch 4/10
Epoch 00004: loss improved from 1481.84058 to 1380.22937, saving model to ./chkpt1
Epoch 5/10
Epoch 00005: loss improved from 1380.22937 to 1290.48267, saving model to ./chkpt1
Epoch 6/10
Epoch 00006: loss improved from 1290.48267 to 1212.24597, saving model to ./chkpt1
Epoch 7/10
Epoch 00007: loss improved from 1212.24597 to 1144.09448, saving model to ./chkpt1
Epoch 8/10
Epoch 00008: loss improved from 1144.09448 to 1084.55872, saving model to ./chkpt1
Epoch 9/10
Epoch 00009: loss improved from 1084.55872 to 1032.42200, saving model to ./chkpt1
Epoch 10/10
Epoch 000

In [5]:


start = time.time()
ranks = partitioned_model.evaluate('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                                   batch_size=400)
end = time.time()
print('Time taken:', end-start)

mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!
Time taken: 173.1760048866272


(703.0901996281436, 0.0938227619140394, 0.0, 0.2562383794891868)

## Filtered evaluation

In [6]:
start = time.time()
ranks = partitioned_model.evaluate('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
                       batch_size=200,
                       corrupt_side='s,o',
                        use_filter={'train':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt',
                              'valid':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/valid.txt',
                              'test':'/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt'})
end = time.time()
print('Time taken:', end-start)
mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10)


28 triples containing invalid keys skipped!

9 triples containing invalid keys skipped!

28 triples containing invalid keys skipped!
Time taken: 1193.5984427928925


(577.7298659360016,
 0.21288052721907064,
 0.14925628730795576,
 0.33736177708190623)