In [1]:
import sys
sys.path.append('../..')
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import numpy as np

In [2]:
import ampligraph
# Benchmark datasets are under ampligraph.datasets module
from ampligraph.datasets import load_fb15k_237
# load fb15k-237 dataset
dataset = load_fb15k_237()

## Train and predict scores

In [3]:
# Import the KGE model
from ampligraph.latent_features import ScoringBasedEmbeddingModel

# create the model with transe scoring function
model = ScoringBasedEmbeddingModel(eta=1, 
                                     k=100,
                                     scoring_type='ComplEx')


# compile the model with loss and optimizer
model.compile(optimizer='adam', loss='multiclass_nll')


dataset = load_fb15k_237()

model.fit(dataset['train'],
             batch_size=10000,
             epochs=10)     


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6d700838e0>

In [4]:
# The predicted scores are unbounded. 
# So it is hard to say just by looking at a single score if it is a good or bad score
pred_out = model.predict(dataset['test'], batch_size=10000)

# print the sorted score
print(np.sort(pred_out))
# rank the triples based on scores
print(np.argsort(pred_out))


[-2.555357  -1.4917737 -1.3967279 ...  6.119666   6.14651    6.2213273]
[ 3834 18634  4569 ... 10495  9727 15757]


## Model calibration

In [5]:
# calibrate on the test set
model.calibrate(dataset['test'],        # Dataset to calibrate on
                batch_size=500,         # Batch size to be used for calibration
                positive_base_rate=0.8, # prior which indicates what percentage of the dataset might be correct
                epochs=100,             # Number of epochs
                verbose=True)             

In [6]:
# use predict_proba to predict the calibrated scores
# You will observe that the predicted scores are now bounded and between [0-1]
out = model.predict_proba(dataset['test'], batch_size=10000)

# if we now look at the sorted scores and ranks, it doesnt change from earlier
print(np.sort(out))
print(np.argsort(out))


[0.00847833 0.04739374 0.05502658 ... 0.9999324  0.9999354  0.9999429 ]
[ 3834 18634  4569 ... 10495  9727 15757]
