In [1]:
import numpy as np
import itertools
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration, _sigmoid_calibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_yago39k
from ampligraph.latent_features.models import TransE, ComplEx, DistMult

In [2]:
from generate_corruptions import generate_corruptions, calibration_loss, pos_iso
import types

In [3]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [4]:
X = load_yago39k()

In [5]:
X_valid_pos = X['valid'][X['valid_labels']]
X_valid_neg = X['valid'][~X['valid_labels']]

X_test_pos = X['test'][X['test_labels']]
X_test_neg = X['test'][~X['test_labels']]

In [None]:
results = []

for eta in [1, 5, 10, 20, 50, 100]:
    model = TransE(batches_count=64, seed=0, epochs=500, k=100, eta=eta,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss='self_adversarial', verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)
    
    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'eta': eta,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
pos [-1.6653656, -5.738102]
pos neg [-1.2534279, -2.9045715]
Instructions for updating:
Use tf.random.categorical instead.
pos sc -1.4891602397921406 -4.630190091298186
pos neg sc -1.25362726605965 -2.913793178911476


  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 1,
  "brier_score_scores": 0.2990940109872152,
  "log_loss_scores": 0.8534317038209447,
  "brier_score_probas_pos": 0.17924375195366551,
  "log_loss_probas_pos": 0.621570183537097,
  "brier_score_probas_pos_neg": 0.132167364349101,
  "log_loss_probas_pos_neg": 0.4212182213972455,
  "brier_score_probas_pos_neg_iso": 0.13048509178910545,
  "log_loss_probas_pos_neg_iso": 0.4143905676133821,
  "brier_score_probas_pos_iso": 0.15279711329855034,
  "log_loss_probas_pos_iso": 0.5033897956900774,
  "brier_score_probas_pos_sc": 0.1542455858541544,
  "log_loss_probas_pos_sc": 0.5083528134544549,
  "brier_score_probas_pos_neg_sc": 0.13214717564302206,
  "log_loss_probas_pos_neg_sc": 0.42122334042488857,
  "ece_scores": 0.34360445740402384,
  "ece_probas_pos": 0.18768277404128858,
  "ece_probas_pos_neg": 0.023940846432921714,
  "ece_probas_pos_neg_iso": 3.328505332765649e-17,
  "ece_probas_pos_iso": 0.13161309109594155,
  "ece_probas_pos_sc": 0.13344239473566674,
  "ece_probas_pos_neg_sc

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 5,
  "brier_score_scores": 0.34525664756514074,
  "log_loss_scores": 1.010852467025244,
  "brier_score_probas_pos": 0.12594672125241357,
  "log_loss_probas_pos": 0.45203168805284766,
  "brier_score_probas_pos_neg": 0.10476622173901565,
  "log_loss_probas_pos_neg": 0.34780318962740875,
  "brier_score_probas_pos_neg_iso": 0.10235210805914366,
  "log_loss_probas_pos_neg_iso": 0.33594316125298135,
  "brier_score_probas_pos_iso": 0.11996567514702204,
  "log_loss_probas_pos_iso": 0.41044275111363593,
  "brier_score_probas_pos_sc": 0.11744910057957787,
  "log_loss_probas_pos_sc": 0.42587330558503766,
  "brier_score_probas_pos_neg_sc": 0.10471888370027199,
  "log_loss_probas_pos_neg_sc": 0.3478071910930869,
  "ece_scores": 0.39329002667498847,
  "ece_probas_pos": 0.13058655031265756,
  "ece_probas_pos_neg": 0.03210463329028058,
  "ece_probas_pos_neg_iso": 3.360215331788881e-17,
  "ece_probas_pos_iso": 0.10545353061571812,
  "ece_probas_pos_sc": 0.10523954245991826,
  "ece_probas_pos

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 10,
  "brier_score_scores": 0.3566023481546557,
  "log_loss_scores": 1.044927650847396,
  "brier_score_probas_pos": 0.11881619257490644,
  "log_loss_probas_pos": 0.4215312425052542,
  "brier_score_probas_pos_neg": 0.09943089439349351,
  "log_loss_probas_pos_neg": 0.3323445493324662,
  "brier_score_probas_pos_neg_iso": 0.09715326167836309,
  "log_loss_probas_pos_neg_iso": 0.32134427636787377,
  "brier_score_probas_pos_iso": 0.11449918993104018,
  "log_loss_probas_pos_iso": 0.3925178473532493,
  "brier_score_probas_pos_sc": 0.11141419305762142,
  "log_loss_probas_pos_sc": 0.4059176938852968,
  "brier_score_probas_pos_neg_sc": 0.09938551689584318,
  "log_loss_probas_pos_neg_sc": 0.332348349834375,
  "ece_scores": 0.40293670818369876,
  "ece_probas_pos": 0.12360714081794985,
  "ece_probas_pos_neg": 0.02849201407483829,
  "ece_probas_pos_neg_iso": 1.1928928573727825e-17,
  "ece_probas_pos_iso": 0.10110610379050722,
  "ece_probas_pos_sc": 0.10039169355659808,
  "ece_probas_pos_neg

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 20,
  "brier_score_scores": 0.36330024075707135,
  "log_loss_scores": 1.0653166959150406,
  "brier_score_probas_pos": 0.11529786156468447,
  "log_loss_probas_pos": 0.40554009329967927,
  "brier_score_probas_pos_neg": 0.09669361261544628,
  "log_loss_probas_pos_neg": 0.32389255539003087,
  "brier_score_probas_pos_neg_iso": 0.09455136437801376,
  "log_loss_probas_pos_neg_iso": 0.3139312619136119,
  "brier_score_probas_pos_iso": 0.11172318802533193,
  "log_loss_probas_pos_iso": 0.38297771559686217,
  "brier_score_probas_pos_sc": 0.10844520617493192,
  "log_loss_probas_pos_sc": 0.3948264706251553,
  "brier_score_probas_pos_neg_sc": 0.09665114448713925,
  "log_loss_probas_pos_neg_sc": 0.3238962391631376,
  "ece_scores": 0.4082506965309061,
  "ece_probas_pos": 0.11989542151010162,
  "ece_probas_pos_neg": 0.028141835530598947,
  "ece_probas_pos_neg_iso": 1.0553087674931595e-17,
  "ece_probas_pos_iso": 0.09866591164543025,
  "ece_probas_pos_sc": 0.09774675212980528,
  "ece_probas_po

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 50,
  "brier_score_scores": 0.36763954495144163,
  "log_loss_scores": 1.078345515147219,
  "brier_score_probas_pos": 0.1131232018706151,
  "log_loss_probas_pos": 0.3954430295396452,
  "brier_score_probas_pos_neg": 0.09492668311968024,
  "log_loss_probas_pos_neg": 0.3180924814284428,
  "brier_score_probas_pos_neg_iso": 0.09291660878186214,
  "log_loss_probas_pos_neg_iso": 0.30917695917617777,
  "brier_score_probas_pos_iso": 0.10985584743058707,
  "log_loss_probas_pos_iso": 0.376567247418866,
  "brier_score_probas_pos_sc": 0.10653240941587439,
  "log_loss_probas_pos_sc": 0.38705073815064134,
  "brier_score_probas_pos_neg_sc": 0.0948853666568497,
  "log_loss_probas_pos_neg_sc": 0.3180959701601974,
  "ece_scores": 0.41161782315982287,
  "ece_probas_pos": 0.11764937534127182,
  "ece_probas_pos_neg": 0.025843228576003886,
  "ece_probas_pos_neg_iso": 5.109413489672763e-18,
  "ece_probas_pos_iso": 0.09672710614599102,
  "ece_probas_pos_sc": 0.09601794100075732,
  "ece_probas_pos_neg

In [None]:
eta_results = results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df=pd.DataFrame(eta_results)
df.columns

In [None]:
df.to_csv("yago39k_eta.csv", index=False)

In [None]:
plt.figure(figsize=(8, 5))
plt.rcParams.update({'font.size': 20, 'axes.titlesize': 18})
plt.plot(df.eta, df.brier_score_probas_pos_neg, lw=3, label="Platt scaling")
plt.plot(df.eta, df.brier_score_probas_pos_neg_iso, lw=3, label="Isotonic")
plt.plot(df.eta, df.brier_score_scores, lw=3, label="Uncalibrated scores")
plt.xlabel("Eta")
plt.ylabel("Brier score")
plt.legend()
plt.title("YAGO39K")

In [None]:
results = []

for k in [10, 25, 50, 100, 200, 500]:
    model = TransE(batches_count=64, seed=0, epochs=500, k=k, eta=20,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss='self_adversarial', verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)
    
    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'k': k,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

In [None]:
k_results = results

In [None]:
df=pd.DataFrame(k_results)
df.columns

In [None]:
df.to_csv("yago39k_k.csv", index=False)

In [None]:
plt.figure(figsize=(8, 5))
plt.rcParams.update({'font.size': 20, 'axes.titlesize': 18})
plt.plot(df.k, df.brier_score_probas_pos_neg, lw=3, label="Platt scaling")
plt.plot(df.k, df.brier_score_probas_pos_neg_iso, lw=3, label="Isotonic")
plt.plot(df.k, df.brier_score_scores, lw=3, label="Uncalibrated scores")
plt.xlabel("Embedding size")
plt.ylabel("Brier score")
plt.legend()
plt.title("YAGO39K")