In [1]:
import numpy as np
import itertools
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_from_csv
from ampligraph.latent_features.models import TransE, ComplEx, DistMult

In [2]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [3]:
X = {
    'train': load_from_csv("~/freebase13", "train.txt"),
    'valid': load_from_csv("~/freebase13", "dev.txt"),
    'test': load_from_csv("~/freebase13", "test.txt"),
}

X['valid_labels'] = X['valid'][:, -1]
X['test_labels'] = X['test'][:, -1]

X['valid'] = X['valid'][:, 0:3]
X['test'] = X['test'][:, 0:3]

In [4]:
X_valid_pos = X['valid'][X['valid_labels'] == "1"]
X_valid_neg = X['valid'][X['valid_labels'] == "-1"]

X_test_pos = X['test'][X['test_labels'] == "1"]
X_test_neg = X['test'][X['test_labels'] == "-1"]

In [5]:
import types
from functools import partial
import tensorflow as tf
from sklearn.utils import check_random_state
from tqdm import tqdm
from ampligraph.datasets import AmpligraphDatasetAdapter, NumpyDatasetAdapter
from ampligraph.evaluation import generate_corruptions_for_fit, to_idx, generate_corruptions_for_eval, \
    hits_at_n_score, mrr_score


def generate_corruptions(self, X_pos, batches_count, epochs):
    try:
        tf.reset_default_graph()
        self.rnd = check_random_state(self.seed)
        tf.random.set_random_seed(self.seed)

        self._load_model_from_trained_params()

        dataset_handle = NumpyDatasetAdapter()
        dataset_handle.use_mappings(self.rel_to_idx, self.ent_to_idx)

        dataset_handle.set_data(X_pos, "pos")

        batch_size_pos = int(np.ceil(dataset_handle.get_size("pos") / batches_count))

        gen_fn = partial(dataset_handle.get_next_train_batch, batch_size=batch_size_pos, dataset_type="pos")
        dataset = tf.data.Dataset.from_generator(gen_fn,
                                                 output_types=tf.int32,
                                                 output_shapes=(None, 3))
        dataset = dataset.repeat().prefetch(1)
        dataset_iter = tf.data.make_one_shot_iterator(dataset)

        x_pos_tf = dataset_iter.get_next()

        e_s, e_p, e_o = self._lookup_embeddings(x_pos_tf)
        scores_pos = self._fn(e_s, e_p, e_o)

        x_neg_tf = generate_corruptions_for_fit(x_pos_tf,
                                                entities_list=None,
                                                eta=1,
                                                corrupt_side='s+o',
                                                entities_size=0,
                                                rnd=self.seed)

        e_s_neg, e_p_neg, e_o_neg = self._lookup_embeddings(x_neg_tf)
        scores = self._fn(e_s_neg, e_p_neg, e_o_neg)

        epoch_iterator_with_progress = tqdm(range(1, epochs + 1), disable=(not self.verbose), unit='epoch')

        scores_list = []
        with tf.Session(config=self.tf_config) as sess:
            sess.run(tf.global_variables_initializer())
            for _ in epoch_iterator_with_progress:
                losses = []
                for batch in range(batches_count):
                    scores_list.append(sess.run(scores))

        dataset_handle.cleanup()
        return np.concatenate(scores_list)
    
    except Exception as e:
        dataset_handle.cleanup()
        raise e

In [6]:
def pos_iso(cal_model, pos_scores, neg_scores, positive_base_rate):
    weigths_pos = len(neg_scores) / len(pos_scores)
    weights_neg = (1.0 - positive_base_rate) / positive_base_rate
    weights = np.concatenate((np.full(pos_scores.shape, weigths_pos),
                              np.full(neg_scores.shape, weights_neg))).astype(float)
    target =  np.concatenate((np.ones(pos_scores.shape), np.zeros(neg_scores.shape))).astype(float)
    x = np.concatenate((pos_scores, neg_scores)).astype(float)
    
    cal_model.fit(x, target, sample_weight=weights)
    return cal_model

In [None]:
losses =  ['self_adversarial', 'pairwise', 'nll', 'multiclass_nll']
models = [TransE, DistMult, ComplEx]

results = []

for m, l in itertools.product(models, losses):
    model = m(batches_count=32, seed=0, epochs=1000, k=100, eta=20,
               optimizer='adam', optimizer_params={'lr':0.0001},
               loss=l, verbose=False)
    
    try:
        model.fit(X['train'])

        scores = model.predict(X['test'])

        model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
        print("pos", model.calibration_parameters)
        probas1 = model.predict_proba(X['test'])

        model.calibrate(X_valid_pos, X_valid_neg)
        print("pos neg", model.calibration_parameters)
        probas2 = model.predict_proba(X['test'])

        val_scores = model.predict(X['valid'])
        ir = IsotonicRegression(out_of_bounds='clip')
        ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels'] == "1").astype(float))
        probas3 = ir.predict(np.squeeze(scores).astype(float))

        model.generate_corruptions = types.MethodType(generate_corruptions, model)
        corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
        val_pos_scores = np.squeeze(model.predict(X_valid_pos))
        iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
        probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

        sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
        print("pos sc", sc_pos.a_, sc_pos.b_)
        probas5 = sc_pos.predict(np.squeeze(scores).astype(float))

        val_neg_scores = np.squeeze(model.predict(X_valid_neg))
        sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
        print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
        probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))

        thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
        thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
        per_relation_acc = accuracy_score(X['test_labels'] == "1", scores > thresholds_test)
        
        acc_uncalib = accuracy_score(X['test_labels'] == "1", expit(scores) > 0.5)

        acc1 = accuracy_score(X['test_labels'] == "1", probas1 > 0.5)
        acc2 = accuracy_score(X['test_labels'] == "1", probas2 > 0.5)
        acc3 = accuracy_score(X['test_labels'] == "1", probas3 > 0.5)
        acc4 = accuracy_score(X['test_labels'] == "1", probas4 > 0.5)
        acc5 = accuracy_score(X['test_labels'] == "1", probas5 > 0.5)
        acc6 = accuracy_score(X['test_labels'] == "1", probas6 > 0.5)
        
        filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
        ranks = evaluate_performance(X_test_pos, 
                                     model=model, 
                                     filter_triples=filter_triples,
                                     use_default_protocol=True, 
                                     verbose=False)
    except Exception as e:
        print("Exception: {}".format(e))
        continue
        
    results.append({
        'model': m.__name__,
        'loss': l,
        'brier_score_scores': brier_score_loss(X['test_labels'] == "1", expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'] == "1", expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'] == "1", probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'] == "1", probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'] == "1", probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'] == "1", probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'] == "1", probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'] == "1", probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'] == "1", probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'] == "1", probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'] == "1", probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'] == "1", probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'] == "1", probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'] == "1", probas6, eps=1e-7),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.random.categorical instead.
Instructions for updating:
Use tf.cast instead.
pos [-1.408169, -5.5876026]
pos neg [-2.7453291, -9.864215]
pos sc -1.5003550603079776 -5.920942824625633
pos neg sc -2.745570659210269 -9.864598727586014
{
  "model": "TransE",
  "loss": "self_adversarial",
  "brier_score_scores": 0.4455475739367166,
  "log_loss_scores": 1.5336591390251544,
  "brier_score_probas_pos": 0.1478029386795

In [None]:
import pandas as pd

In [15]:
results_backup = results.copy()

In [None]:
# results.append({
#   "model": "DistMult",
#   "loss": "nll",
#   "brier_score_scores": 0.4382576683465266,
#   "log_loss_scores": 3.087698336544197,
#   "brier_score_probas_pos": 0.23832094695816006,
#   "log_loss_probas_pos": 0.6697501021942958,
#   "brier_score_probas_pos_neg": 0.23767353922895937,
#   "log_loss_probas_pos_neg": 0.6687839223472528,
#   "brier_score_probas_pos_neg_iso": 0.22622558183032773,
#   "log_loss_probas_pos_neg_iso": 0.6445887874353222,
#   "brier_score_probas_pos_iso": 0.23646780486189833,
#   "log_loss_probas_pos_iso": 0.6700627657820418,
#   "brier_score_probas_pos_sc": 0.2383986963438918,
#   "log_loss_probas_pos_sc": 0.6698704088360236,
#   "brier_score_probas_pos_neg_sc": 0.237673619733912,
#   "log_loss_probas_pos_neg_sc": 0.6687839672209751,
#   "metrics_mrr": 0.028533441075687185,
#   "metrics_hits@10": 0.05494459191842582,
#   "metrics_mr": 16951.14256520457,
#   "accuracy_per_relation": 0.6097041968649924,
#   "accuracy_pos": 0.5882142255182875,
#   "accuracy_pos_neg": 0.6101045002528231,
#   "accuracy_pos_neg_iso": 0.6282234957020058,
#   "accuracy_pos_iso": 0.626432664756447,
#   "accuracy_pos_sc": 0.5865076689701668,
#   "accuracy_pos_neg_sc": 0.6101045002528231
# })

In [16]:
def highlight_min(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

In [21]:
df = pd.DataFrame(results).set_index(['model', 'loss'])

In [22]:
bs = df[(c for c in df.columns if c.startswith('brier'))]
bs.columns = [c[len("brier_score_"):] for c in bs.columns]
bs.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.147803,0.141008,0.12417,0.124279,0.12417,0.14521,0.445548
TransE,pairwise,0.227406,0.208031,0.224698,0.202994,0.224697,0.228499,0.49996
TransE,nll,0.253707,0.243704,0.209477,0.20315,0.209479,0.253236,0.235682
TransE,multiclass_nll,0.162581,0.158707,0.146117,0.145664,0.146117,0.156674,0.500021
DistMult,self_adversarial,0.184887,0.191851,0.178077,0.170496,0.178077,0.185138,0.472947
DistMult,pairwise,0.215361,0.219627,0.214035,0.204016,0.214035,0.215306,0.237538
DistMult,nll,0.238321,0.236468,0.237673,0.226226,0.237674,0.238399,0.438258
DistMult,multiclass_nll,0.182919,0.195011,0.17989,0.173913,0.179891,0.182901,0.314046
ComplEx,self_adversarial,0.181526,0.189419,0.177498,0.16957,0.177498,0.181581,0.481122
ComplEx,pairwise,0.224829,0.234694,0.224049,0.210799,0.224048,0.224813,0.235555


In [23]:
ll = df[(c for c in df.columns if c.startswith('log_loss'))]
ll.columns = [c[len("log_loss_"):] for c in ll.columns]
ll.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.458933,0.442277,0.390337,0.390529,0.390343,0.451575,1.53366
TransE,pairwise,0.641794,0.593625,0.636255,0.582087,0.636255,0.643769,5.23359
TransE,nll,0.704512,0.684656,0.614706,0.592849,0.614712,0.703936,0.663464
TransE,multiclass_nll,0.50143,0.490505,0.454885,0.454219,0.454885,0.485309,7.90357
DistMult,self_adversarial,0.549499,0.566858,0.533494,0.517608,0.533495,0.550056,2.17742
DistMult,pairwise,0.620659,0.628691,0.618804,0.594134,0.618804,0.620544,0.667003
DistMult,nll,0.66975,0.670063,0.668784,0.644589,0.668784,0.66987,3.0877
DistMult,multiclass_nll,0.556157,0.575322,0.553508,0.525326,0.553508,0.55611,1.27324
ComplEx,self_adversarial,0.543875,0.565044,0.534285,0.515995,0.534285,0.543993,2.39253
ComplEx,pairwise,0.642209,0.663042,0.641281,0.60857,0.641281,0.642167,0.661735


In [24]:
print((bs.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
 .round(3)
 .to_latex()))

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   0.446 &           0.124 &               0.124 &       0.148 &           0.141 \\
1 &  DistMult &   0.473 &           0.178 &               0.170 &       0.185 &           0.192 \\
2 &   ComplEx &   0.481 &           0.177 &               0.170 &       0.182 &           0.189 \\
\bottomrule
\end{tabular}



In [25]:
print(ll.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   1.534 &           0.390 &               0.391 &       0.459 &           0.442 \\
1 &  DistMult &   2.177 &           0.533 &               0.518 &       0.549 &           0.567 \\
2 &   ComplEx &   2.393 &           0.534 &               0.516 &       0.544 &           0.565 \\
\bottomrule
\end{tabular}



In [33]:
print((acc*100).reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'pos_neg', 'pos_neg_iso', 'pos', 'pos_iso',  'uncalib', 'per_relation']]
 .reset_index(drop=True)
  .round(1)
 .to_latex())

\begin{tabular}{llrrrrrr}
\toprule
{} &     model &  pos\_neg &  pos\_neg\_iso &   pos &  pos\_iso &  uncalib &  per\_relation \\
\midrule
0 &    TransE &     82.4 &         82.3 &  79.3 &     80.3 &     50.0 &          82.0 \\
1 &  DistMult &     72.5 &         73.2 &  72.3 &     70.2 &     50.1 &          80.8 \\
2 &   ComplEx &     73.8 &         74.2 &  74.2 &     72.4 &     50.1 &          83.6 \\
\bottomrule
\end{tabular}



In [27]:
metrics = df[(c for c in df.columns if c.startswith('metrics'))]
metrics.columns = [c[len("metrics_"):] for c in metrics.columns]
metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,hits@10,mr,mrr
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TransE,self_adversarial,0.393713,3437.758417,0.296085
TransE,pairwise,0.374099,7631.636561,0.282413
TransE,nll,0.272153,6790.529242,0.202153
TransE,multiclass_nll,0.402035,5049.899317,0.308465
DistMult,self_adversarial,0.320103,5998.059601,0.181442
DistMult,pairwise,0.162032,11409.450027,0.095053
DistMult,nll,0.054945,16951.139258,0.028533
DistMult,multiclass_nll,0.25829,7125.716892,0.171373
ComplEx,self_adversarial,0.337273,6667.318902,0.183393
ComplEx,pairwise,0.156786,12745.941769,0.092511


In [28]:
def highlight_max(s):
    is_min = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_min]

acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_max, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,per_relation,pos,pos_iso,pos_neg,pos_neg_iso,pos_neg_sc,pos_sc,uncalib
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
TransE,self_adversarial,0.820032,0.792622,0.802608,0.823508,0.823424,0.823466,0.795993,0.499979
TransE,pairwise,0.804526,0.610252,0.640022,0.554462,0.642234,0.554547,0.610189,0.499979
TransE,nll,0.663893,0.54496,0.621777,0.67883,0.67942,0.67883,0.550185,0.499979
TransE,multiclass_nll,0.825678,0.784658,0.778021,0.796857,0.796667,0.796857,0.787355,0.499979
DistMult,self_adversarial,0.80817,0.722927,0.702216,0.724907,0.73167,0.724907,0.722864,0.500864
DistMult,pairwise,0.682875,0.666337,0.647354,0.685846,0.692609,0.685846,0.6672,0.57018
DistMult,nll,0.609704,0.588214,0.626433,0.610147,0.628223,0.610105,0.586508,0.540473
DistMult,multiclass_nll,0.764917,0.742795,0.708305,0.748167,0.749178,0.748167,0.743237,0.60252
ComplEx,self_adversarial,0.83577,0.741762,0.724001,0.737696,0.74172,0.737675,0.741783,0.500611
ComplEx,pairwise,0.637136,0.616446,0.608061,0.648639,0.671899,0.64866,0.61691,0.553746


In [29]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'log_loss_probas_pos_neg', 'log_loss_probas_pos_neg_iso', 'log_loss_probas_pos', 'log_loss_probas_pos_iso']]

Unnamed: 0,index,log_loss_probas_pos_neg,log_loss_probas_pos_neg_iso,log_loss_probas_pos,log_loss_probas_pos_iso
0,accuracy_per_relation,-0.867133,-0.937063,-0.881119,-0.923077
1,accuracy_pos,-0.839161,-0.839161,-0.951049,-0.909091
2,accuracy_pos_iso,-0.839161,-0.86014,-0.916084,-0.923077
3,accuracy_pos_neg,-0.923077,-0.867133,-0.867133,-0.783217
4,accuracy_pos_neg_iso,-0.944056,-0.902098,-0.888112,-0.818182
5,accuracy_pos_neg_sc,-0.923077,-0.867133,-0.867133,-0.783217
6,accuracy_pos_sc,-0.839161,-0.839161,-0.951049,-0.909091
7,accuracy_uncalib,0.34525,0.405758,0.181523,0.316776


In [30]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'brier_score_probas_pos_neg', 'brier_score_probas_pos_neg_iso', 'brier_score_probas_pos', 'brier_score_probas_pos_iso']]

Unnamed: 0,index,brier_score_probas_pos_neg,brier_score_probas_pos_neg_iso,brier_score_probas_pos,brier_score_probas_pos_iso
0,accuracy_per_relation,-0.846154,-0.937063,-0.825175,-0.923077
1,accuracy_pos,-0.853147,-0.839161,-0.972028,-0.909091
2,accuracy_pos_iso,-0.832168,-0.86014,-0.902098,-0.923077
3,accuracy_pos_neg,-0.951049,-0.867133,-0.902098,-0.783217
4,accuracy_pos_neg_iso,-0.965035,-0.902098,-0.916084,-0.818182
5,accuracy_pos_neg_sc,-0.951049,-0.867133,-0.902098,-0.783217
6,accuracy_pos_sc,-0.853147,-0.839161,-0.972028,-0.909091
7,accuracy_uncalib,0.306098,0.405758,0.09966,0.316776


In [34]:
len(np.unique(X['valid'][:, 1]))

7