In [1]:
import numpy as np
import itertools
import pandas as pd
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_yago39k
from ampligraph.latent_features.models import TransE, ComplEx, DistMult
import types

In [2]:
from generate_corruptions import generate_corruptions, calibration_loss, pos_iso

In [3]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [4]:
X = load_yago39k()

In [5]:
X_valid_pos = X['valid'][X['valid_labels']]
X_valid_neg = X['valid'][~X['valid_labels']]

X_test_pos = X['test'][X['test_labels']]
X_test_neg = X['test'][~X['test_labels']]

In [6]:
losses =  ['self_adversarial', 'pairwise', 'nll', 'multiclass_nll']
models = [TransE, DistMult, ComplEx]

results = []

for m, l in itertools.product(models, losses):
    model = m(batches_count=64, seed=0, epochs=1000, k=100, eta=20,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss=l, verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)

    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'model': m.__name__,
        'loss': l,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
pos [-2.0010226, -7.369637]
pos neg [-1.8528956, -5.5881095]
Instructions for updating:
Use tf.random.categorical instead.
pos sc -2.32434270509557 -8.266740859753332
pos neg sc -1.8536496942780376 -5.599349552076064


  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "self_adversarial",
  "brier_score_scores": 0.36302252768719817,
  "log_loss_scores": 1.0622060798888528,
  "brier_score_probas_pos": 0.11324827346184063,
  "log_loss_probas_pos": 0.3990180135640079,
  "brier_score_probas_pos_neg": 0.09484856633983454,
  "log_loss_probas_pos_neg": 0.31857378505872125,
  "brier_score_probas_pos_neg_iso": 0.09250361000856487,
  "log_loss_probas_pos_neg_iso": 0.3081948776976417,
  "brier_score_probas_pos_iso": 0.10927081592075297,
  "log_loss_probas_pos_iso": 0.37590272087934945,
  "brier_score_probas_pos_sc": 0.10616221881001453,
  "log_loss_probas_pos_sc": 0.38805080291314337,
  "brier_score_probas_pos_neg_sc": 0.09480421413974688,
  "log_loss_probas_pos_neg_sc": 0.3185774106611587,
  "ece_scores": 0.40830181367935675,
  "ece_probas_pos": 0.11868441263834635,
  "ece_probas_pos_neg": 0.028639741302818367,
  "ece_probas_pos_neg_iso": 1.1555869761689801e-17,
  "ece_probas_pos_iso": 0.09606850604195921,
  "ece_probas_pos_sc"

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "pairwise",
  "brier_score_scores": 0.49772074388347165,
  "log_loss_scores": 4.921945735433863,
  "brier_score_probas_pos": 0.1470969047874042,
  "log_loss_probas_pos": 0.47739234029725996,
  "brier_score_probas_pos_neg": 0.123208328346179,
  "log_loss_probas_pos_neg": 0.44454980222833146,
  "brier_score_probas_pos_neg_iso": 0.10320889810531554,
  "log_loss_probas_pos_neg_iso": 0.3517519332316571,
  "brier_score_probas_pos_iso": 0.11293827956899179,
  "log_loss_probas_pos_iso": 0.39293455449606485,
  "brier_score_probas_pos_sc": 0.13953547628203666,
  "log_loss_probas_pos_sc": 0.4784827552125384,
  "brier_score_probas_pos_neg_sc": 0.12332184709502418,
  "log_loss_probas_pos_neg_sc": 0.4445549083746815,
  "ece_scores": 0.49774829798488207,
  "ece_probas_pos": 0.19326106799546108,
  "ece_probas_pos_neg": 0.14001669688891338,
  "ece_probas_pos_neg_iso": 9.060852426779504e-17,
  "ece_probas_pos_iso": 0.06509857470870574,
  "ece_probas_pos_sc": 0.1328616564

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "nll",
  "brier_score_scores": 0.21784579412106042,
  "log_loss_scores": 0.6263009653959714,
  "brier_score_probas_pos": 0.2602587073858255,
  "log_loss_probas_pos": 0.7561972348480968,
  "brier_score_probas_pos_neg": 0.18755191875709093,
  "log_loss_probas_pos_neg": 0.5773911825392319,
  "brier_score_probas_pos_neg_iso": 0.17024512059350866,
  "log_loss_probas_pos_neg_iso": 0.5183417435281257,
  "brier_score_probas_pos_iso": 0.20012937065398956,
  "log_loss_probas_pos_iso": 0.621679194380186,
  "brier_score_probas_pos_sc": 0.23503884402118982,
  "log_loss_probas_pos_sc": 0.679265388467313,
  "brier_score_probas_pos_neg_sc": 0.18761304103056478,
  "log_loss_probas_pos_neg_sc": 0.5773792040089649,
  "ece_scores": 0.15027275751995783,
  "ece_probas_pos": 0.2475832591518279,
  "ece_probas_pos_neg": 0.0970551919424406,
  "ece_probas_pos_neg_iso": 1.055547525132864e-16,
  "ece_probas_pos_iso": 0.16716382999702506,
  "ece_probas_pos_sc": 0.1981722519558999,
 

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "multiclass_nll",
  "brier_score_scores": 0.4977946275853237,
  "log_loss_scores": 7.859535452552008,
  "brier_score_probas_pos": 0.12828468669263407,
  "log_loss_probas_pos": 0.4325858789011811,
  "brier_score_probas_pos_neg": 0.11177965364703041,
  "log_loss_probas_pos_neg": 0.3947554223886153,
  "brier_score_probas_pos_neg_iso": 0.10420754263728874,
  "log_loss_probas_pos_neg_iso": 0.3517703548796889,
  "brier_score_probas_pos_iso": 0.11652963893872059,
  "log_loss_probas_pos_iso": 0.44243625898842465,
  "brier_score_probas_pos_sc": 0.12315189059065008,
  "log_loss_probas_pos_sc": 0.4768157164448518,
  "brier_score_probas_pos_neg_sc": 0.11174939021187671,
  "log_loss_probas_pos_neg_sc": 0.3947600928854345,
  "ece_scores": 0.4977949906549647,
  "ece_probas_pos": 0.1396986916244671,
  "ece_probas_pos_neg": 0.07281328837076821,
  "ece_probas_pos_neg_iso": 7.82528164130957e-18,
  "ece_probas_pos_iso": 0.09125392193509929,
  "ece_probas_pos_sc": 0.0954662

  E = np.exp(AB[0] * F + AB[1])
  TEP_minus_T1P = P * (T * E - T1)


pos sc -0.238725597936733 2.7487726910510495
pos neg sc -0.22040129037500822 3.315367014924353
{
  "model": "DistMult",
  "loss": "multiclass_nll",
  "brier_score_scores": 0.4563010479896885,
  "log_loss_scores": 4.224559624967882,
  "brier_score_probas_pos": 0.17167909333101242,
  "log_loss_probas_pos": 0.5161328252404302,
  "brier_score_probas_pos_neg": 0.12655919205327312,
  "log_loss_probas_pos_neg": 0.40707941447404056,
  "brier_score_probas_pos_neg_iso": 0.11258924225310064,
  "log_loss_probas_pos_neg_iso": 0.3518024669721689,
  "brier_score_probas_pos_iso": 0.13458584661739978,
  "log_loss_probas_pos_iso": 0.40586321438012934,
  "brier_score_probas_pos_sc": 0.14929025823321715,
  "log_loss_probas_pos_sc": 0.45087411237855207,
  "brier_score_probas_pos_neg_sc": 0.12665846179368587,
  "log_loss_probas_pos_neg_sc": 0.4070846198609235,
  "ece_scores": 0.46467972370886035,
  "ece_probas_pos": 0.15811050456057316,
  "ece_probas_pos_neg": 0.07275065104166667,
  "ece_probas_pos_neg_iso"

  E = np.exp(AB[0] * F + AB[1])
  TEP_minus_T1P = P * (T * E - T1)


pos sc -0.29024916512395377 2.8758542316570543
pos neg sc -0.2646602843774886 3.4546923104479426
{
  "model": "ComplEx",
  "loss": "multiclass_nll",
  "brier_score_scores": 0.42882663951865674,
  "log_loss_scores": 3.3327317538162293,
  "brier_score_probas_pos": 0.1512251734073152,
  "log_loss_probas_pos": 0.4631039519249512,
  "brier_score_probas_pos_neg": 0.10791405838081022,
  "log_loss_probas_pos_neg": 0.353527897720351,
  "brier_score_probas_pos_neg_iso": 0.10117277542640077,
  "log_loss_probas_pos_neg_iso": 0.3167186081062865,
  "brier_score_probas_pos_iso": 0.12303165845386482,
  "log_loss_probas_pos_iso": 0.3718940172463859,
  "brier_score_probas_pos_sc": 0.12903198374236924,
  "log_loss_probas_pos_sc": 0.3963750608602761,
  "brier_score_probas_pos_neg_sc": 0.10799402360280319,
  "log_loss_probas_pos_neg_sc": 0.3535323817013859,
  "ece_scores": 0.4444651114043369,
  "ece_probas_pos": 0.14252598957348894,
  "ece_probas_pos_neg": 0.07074522203014741,
  "ece_probas_pos_neg_iso": 1

In [7]:
import pandas as pd

In [8]:
def highlight_min(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

In [9]:
df = pd.DataFrame(results).set_index(['model', 'loss'])

In [10]:
bs = df[(c for c in df.columns if c.startswith('brier'))]
bs.columns = [c[len("brier_score_"):] for c in bs.columns]
bs.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.113248,0.109271,0.0948486,0.0925036,0.0948042,0.106162,0.363023
TransE,pairwise,0.147097,0.112938,0.123208,0.103209,0.123322,0.139535,0.497721
TransE,nll,0.260259,0.200129,0.187552,0.170245,0.187613,0.235039,0.217846
TransE,multiclass_nll,0.128285,0.11653,0.11178,0.104208,0.111749,0.123152,0.497795
DistMult,self_adversarial,0.0908349,0.0886698,0.0813233,0.0785058,0.0813693,0.0929009,0.284488
DistMult,pairwise,0.167731,0.133479,0.129223,0.115373,0.129328,0.148937,0.371687
DistMult,nll,0.0785158,0.0761127,0.0717517,0.0690451,0.071781,0.0768545,0.104989
DistMult,multiclass_nll,0.171679,0.134586,0.126559,0.112589,0.126658,0.14929,0.456301
ComplEx,self_adversarial,0.0947329,0.0948809,0.0893608,0.0843688,0.0894297,0.097268,0.264185
ComplEx,pairwise,0.150501,0.123009,0.114799,0.106576,0.114883,0.131116,0.321751


In [11]:
ll = df[(c for c in df.columns if c.startswith('log_loss'))]
ll.columns = [c[len("log_loss_"):] for c in ll.columns]
ll.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.399018,0.375903,0.318574,0.308195,0.318577,0.388051,1.06221
TransE,pairwise,0.477392,0.392935,0.44455,0.351752,0.444555,0.478483,4.92195
TransE,nll,0.756197,0.621679,0.577391,0.518342,0.577379,0.679265,0.626301
TransE,multiclass_nll,0.432586,0.442436,0.394755,0.35177,0.39476,0.476816,7.85954
DistMult,self_adversarial,0.308823,0.307952,0.279444,0.265872,0.279448,0.310798,1.04257
DistMult,pairwise,0.503579,0.398257,0.415324,0.356179,0.41533,0.45039,1.4669
DistMult,nll,0.270794,0.261522,0.247208,0.231766,0.247211,0.263137,0.71899
DistMult,multiclass_nll,0.516133,0.405863,0.407079,0.351802,0.407085,0.450874,4.22456
ComplEx,self_adversarial,0.319064,0.312896,0.305282,0.277733,0.305286,0.322939,1.19863
ComplEx,pairwise,0.462606,0.368846,0.373295,0.329402,0.3733,0.404539,1.06682


In [12]:
print((bs.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
 .round(3)
 .to_latex()))

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   0.363 &           0.095 &               0.093 &       0.113 &           0.109 \\
1 &  DistMult &   0.284 &           0.081 &               0.079 &       0.091 &           0.089 \\
2 &   ComplEx &   0.264 &           0.089 &               0.084 &       0.095 &           0.095 \\
\bottomrule
\end{tabular}



In [13]:
print(ll.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   1.062 &           0.319 &               0.308 &       0.399 &           0.376 \\
1 &  DistMult &   1.043 &           0.279 &               0.266 &       0.309 &           0.308 \\
2 &   ComplEx &   1.199 &           0.305 &               0.278 &       0.319 &           0.313 \\
\bottomrule
\end{tabular}



In [14]:
print((acc*100).reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'pos_neg', 'pos_neg_iso', 'pos', 'pos_iso',  'uncalib', 'per_relation']]
 .reset_index(drop=True)
  .round(1)
 .to_latex())

NameError: name 'acc' is not defined

In [None]:
metrics = df[(c for c in df.columns if c.startswith('metrics'))]
metrics.columns = [c[len("metrics_"):] for c in metrics.columns]
metrics

In [None]:
def highlight_max(s):
    is_min = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_min]

acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_max, axis=1)

In [None]:
df.corr(method='spearman')

In [None]:
len(np.unique(X['valid'][:, 1]))