In [1]:
import sys
sys.path.append('../..')
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import numpy as np
import ampligraph

In [2]:
# Import the KGE model
from ampligraph.latent_features import ScoringBasedEmbeddingModel
from ampligraph.datasets.sqlite_adapter import SQLiteAdapter
from ampligraph.datasets.graph_data_loader import GraphDataLoader

In [3]:
# Graph loader - loads the data from the file, numpy array, etc and generates batchs for iterating
# Internally it will first map raw data to indices and store in db.
# then this will map the raw triples to indices and store in another db
dataset_loader = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/train.txt', 
                                  backend=SQLiteAdapter, # type of backend to use
                                  batch_size=1000,       # batch size to use while iterating over this dataset
                                  dataset_type='train',  # dataset type
                                  use_filter=False,      # Whether to use filter or not
                                  use_indexer=True)      # indicates that the data needs to be mapped to index




In [4]:
# Choose the partitioner - in this case we choose RandomEdges partitioner
from ampligraph.datasets import RandomEdgesGraphPartitioner
partitioner = RandomEdgesGraphPartitioner(dataset_loader, k=3)

# the above code will create a partitioner by passing the graph dataloader object
# the partitioner will partition the data and will internally create multiple graph 
# data loaders for each partition.


_split: memory before: 864.0Bytes, after: 12.827MB, consumed: 12.826MB; exec time: 19.218s


In [5]:
# create and compile a model as usual
partitioned_model = ScoringBasedEmbeddingModel(eta=2, 
                                     k=50, 
                                     scoring_type='DistMult')

partitioned_model.compile(optimizer='adam', loss='multiclass_nll')

partitioned_model.fit(partitioner,            # pass the partitioner object as input to the fit function
                                              # this will generate data for the model during training
                                              # No need to pass partitioning_k parameter as this will be 
                                              # overridden by partitioner_k of input partitioner
                      epochs=10)              # number of epochs

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f255998f040>

In [6]:
# create an instance of graph(triple) loader for test set by passing sql backend. 
# the data will be indexed using the models training indexer
# and the indexed triples will be stored in a database in chunks
dataset_loader_test = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
            backend=SQLiteAdapter,     # type of backend to use
            batch_size=400,            # batch size to use while iterating over this dataset
            dataset_type='test',       # dataset type
            use_indexer=partitioned_model.data_indexer)    # get the data_indexer from the trained model 
                                                                        # and map the concepts to same indices 
                                                                        # as used during training



28 triples containing invalid keys skipped!


In [7]:
ranks = partitioned_model.evaluate(dataset_loader_test, # pass the dataloader object as input to the 
                                                        # evaluate function. this will generate data
                                                        # for the model during training
                                   batch_size=400)
from ampligraph.evaluation.metrics import mrr_score, hits_at_n_score, mr_score
mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10), len(ranks)



(1171.3521381739897, 0.0739156438083333, 0.0, 0.20427634797925434, 20438)

In [8]:
from ampligraph.utils import save_model
save_model(model=partitioned_model, model_name_path='./partitioned_model_random_edges')

The path ./partitioned_model_random_edges already exists. This save operation will overwrite the model                 at the specified path.


In [9]:
from ampligraph.utils import restore_model
model = restore_model('./partitioned_model_random_edges')

In [10]:
dataset_loader_test = GraphDataLoader('/home/spai/code/ampligraph_projects/dataset/fb15k-237/test.txt', 
            backend=SQLiteAdapter,     # type of backend to use
            batch_size=400,            # batch size to use while iterating over this dataset
            dataset_type='test',       # dataset type
            use_indexer=model.data_indexer)    # get the mapper from the trained model 
                                                                        # and map the concepts to same indices 
                                                                        # as used during training



28 triples containing invalid keys skipped!


In [11]:
ranks = model.evaluate(dataset_loader_test, # pass the dataloader object as input to the 
                                                        # evaluate function. this will generate data
                                                        # for the model during training
                                   batch_size=400)
from ampligraph.evaluation.metrics import mrr_score, hits_at_n_score, mr_score
mr_score(ranks), mrr_score(ranks), hits_at_n_score(ranks, 1), hits_at_n_score(ranks, 10), len(ranks)



(1171.3521381739897, 0.0739156438083333, 0.0, 0.20427634797925434, 20438)