In [1]:
import numpy as np
import itertools
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_fb13
from ampligraph.latent_features.models import TransE, ComplEx, DistMult
import types

In [2]:
from generate_corruptions import generate_corruptions, calibration_loss, pos_iso

In [3]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [4]:
X = load_fb13()

In [5]:
X_valid_pos = X['valid'][X['valid_labels']]
X_valid_neg = X['valid'][~X['valid_labels']]

X_test_pos = X['test'][X['test_labels']]
X_test_neg = X['test'][~X['test_labels']]

In [None]:
losses =  ['self_adversarial', 'pairwise', 'nll', 'multiclass_nll']
models = [TransE, DistMult, ComplEx]

results = []

for m, l in itertools.product(models, losses):
    model = m(batches_count=32, seed=0, epochs=1000, k=100, eta=20,
               optimizer='adam', optimizer_params={'lr':0.0001},
               loss=l, verbose=False)
    
    try:
        model.fit(X['train'])

        scores = model.predict(X['test'])

        model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
        print("pos", model.calibration_parameters)
        probas1 = model.predict_proba(X['test'])

        model.calibrate(X_valid_pos, X_valid_neg)
        print("pos neg", model.calibration_parameters)
        probas2 = model.predict_proba(X['test'])

        val_scores = model.predict(X['valid'])
        ir = IsotonicRegression(out_of_bounds='clip')
        ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
        probas3 = ir.predict(np.squeeze(scores).astype(float))

        model.generate_corruptions = types.MethodType(generate_corruptions, model)
        corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
        val_pos_scores = np.squeeze(model.predict(X_valid_pos))
        iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
        probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

        sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
        print("pos sc", sc_pos.a_, sc_pos.b_)
        probas5 = sc_pos.predict(np.squeeze(scores).astype(float))

        val_neg_scores = np.squeeze(model.predict(X_valid_neg))
        sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
        print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
        probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))

        thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
        thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
        per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)
        
        acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)

        acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
        acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
        acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
        acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
        acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
        acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
        
        filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
        ranks = evaluate_performance(X_test_pos, 
                                     model=model, 
                                     filter_triples=filter_triples,
                                     use_default_protocol=True, 
                                     verbose=False)
    except Exception as e:
        print("Exception: {}".format(e))
        continue
        
    results.append({
        'model': m.__name__,
        'loss': l,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
pos [-1.4296955, -5.5330477]
pos neg [-2.745783, -9.865425]
Instructions for updating:
Use tf.random.categorical instead.
pos sc -1.5014799113967103 -5.925346708263222
pos neg sc -2.745765486884588 -9.86536315025429


  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "self_adversarial",
  "brier_score_scores": 0.44554974123999214,
  "log_loss_scores": 1.533803733208319,
  "brier_score_probas_pos": 0.1423859988388692,
  "log_loss_probas_pos": 0.4469818503702879,
  "brier_score_probas_pos_neg": 0.12434945745450178,
  "log_loss_probas_pos_neg": 0.3907171836879325,
  "brier_score_probas_pos_neg_iso": 0.124352102967138,
  "log_loss_probas_pos_neg_iso": 0.3907786528156528,
  "brier_score_probas_pos_iso": 0.14135220360648737,
  "log_loss_probas_pos_iso": 0.44296393211287666,
  "brier_score_probas_pos_sc": 0.1454120464243028,
  "log_loss_probas_pos_sc": 0.4519962641419201,
  "brier_score_probas_pos_neg_sc": 0.12434946643338107,
  "log_loss_probas_pos_neg_sc": 0.39071610098006654,
  "ece_scores": 0.4624879393653124,
  "ece_probas_pos": 0.10791181316468958,
  "ece_probas_pos_neg": 0.010122691080057063,
  "ece_probas_pos_neg_iso": 0.007116593848850285,
  "ece_probas_pos_iso": 0.08716301897226798,
  "ece_probas_pos_sc": 0.11343

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "pairwise",
  "brier_score_scores": 0.4999600556860562,
  "log_loss_scores": 5.233593539454379,
  "brier_score_probas_pos": 0.22494239533363558,
  "log_loss_probas_pos": 0.6373193005651496,
  "brier_score_probas_pos_neg": 0.22469787795731397,
  "log_loss_probas_pos_neg": 0.6362553057238051,
  "brier_score_probas_pos_neg_iso": 0.20299433345446838,
  "log_loss_probas_pos_neg_iso": 0.5820868916678338,
  "brier_score_probas_pos_iso": 0.20803145528953107,
  "log_loss_probas_pos_iso": 0.5936251368204077,
  "brier_score_probas_pos_sc": 0.2284992373799987,
  "log_loss_probas_pos_sc": 0.6437693785737651,
  "brier_score_probas_pos_neg_sc": 0.22469730589396622,
  "log_loss_probas_pos_neg_sc": 0.6362553495353314,
  "ece_scores": 0.4999776426222399,
  "ece_probas_pos": 0.10634056276933913,
  "ece_probas_pos_neg": 0.156289998909715,
  "ece_probas_pos_neg_iso": 0.006800646281727416,
  "ece_probas_pos_iso": 0.06314431733619326,
  "ece_probas_pos_sc": 0.1514007931267311

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count
  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "model": "TransE",
  "loss": "nll",
  "brier_score_scores": 0.2356852277835587,
  "log_loss_scores": 0.6634528751939001,
  "brier_score_probas_pos": 0.23940801192799221,
  "log_loss_probas_pos": 0.6758681786576761,
  "brier_score_probas_pos_neg": 0.2092276480664465,
  "log_loss_probas_pos_neg": 0.6140861720148587,
  "brier_score_probas_pos_neg_iso": 0.20295373284390997,
  "log_loss_probas_pos_neg_iso": 0.5926602140869712,
  "brier_score_probas_pos_iso": 0.24326005536628312,
  "log_loss_probas_pos_iso": 0.6837006758597755,
  "brier_score_probas_pos_sc": 0.25280592963927584,
  "log_loss_probas_pos_sc": 0.703133247862919,
  "brier_score_probas_pos_neg_sc": 0.2092300032913679,
  "log_loss_probas_pos_neg_sc": 0.6140878475953448,
  "ece_scores": 0.14380922404706692,
  "ece_probas_pos": 0.15978150862617116,
  "ece_probas_pos_neg": 0.04320384164935883,
  "ece_probas_pos_neg_iso": 0.009459265670122305,
  "ece_probas_pos_iso": 0.17844937728238,
  "ece_probas_pos_sc": 0.18691165281188388,
  "

In [None]:
import pandas as pd

In [None]:
def highlight_min(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

In [None]:
df = pd.DataFrame(results).set_index(['model', 'loss'])

In [None]:
bs = df[(c for c in df.columns if c.startswith('brier'))]
bs.columns = [c[len("brier_score_"):] for c in bs.columns]
bs.style.apply(highlight_min, axis=1)

In [None]:
ll = df[(c for c in df.columns if c.startswith('log_loss'))]
ll.columns = [c[len("log_loss_"):] for c in ll.columns]
ll.style.apply(highlight_min, axis=1)

In [None]:
print((bs.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
 .round(3)
 .to_latex()))

In [None]:
print(ll.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

In [None]:
print((acc*100).reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'pos_neg', 'pos_neg_iso', 'pos', 'pos_iso',  'uncalib', 'per_relation']]
 .reset_index(drop=True)
  .round(1)
 .to_latex())

In [None]:
metrics = df[(c for c in df.columns if c.startswith('metrics'))]
metrics.columns = [c[len("metrics_"):] for c in metrics.columns]
metrics

In [None]:
def highlight_max(s):
    is_min = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_min]

acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_max, axis=1)

In [None]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'log_loss_probas_pos_neg', 'log_loss_probas_pos_neg_iso', 'log_loss_probas_pos', 'log_loss_probas_pos_iso']]

In [None]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'brier_score_probas_pos_neg', 'brier_score_probas_pos_neg_iso', 'brier_score_probas_pos', 'brier_score_probas_pos_iso']]

In [None]:
len(np.unique(X['valid'][:, 1]))