In [1]:
import numpy as np
import itertools
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration, _sigmoid_calibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_fb13
from ampligraph.latent_features.models import TransE, ComplEx, DistMult

In [2]:
from generate_corruptions import generate_corruptions, calibration_loss, pos_iso
import types

In [3]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [4]:
X = load_fb13()

In [5]:
X_valid_pos = X['valid'][X['valid_labels']]
X_valid_neg = X['valid'][~X['valid_labels']]

X_test_pos = X['test'][X['test_labels']]
X_test_neg = X['test'][~X['test_labels']]

In [None]:
results = []

for eta in [1, 5, 10, 20, 50, 100]:
    model = TransE(batches_count=64, seed=0, epochs=500, k=100, eta=eta,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss='self_adversarial', verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)
    
    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'eta': eta,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
pos [-1.0954863, -4.334414]
pos neg [-1.6125523, -5.5522327]
Instructions for updating:
Use tf.random.categorical instead.
pos sc -1.0922488047710615 -4.463725882491225
pos neg sc -1.6125365361080692 -5.55218235696441


  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 1,
  "brier_score_scores": 0.43010238901625863,
  "log_loss_scores": 1.4585676094150124,
  "brier_score_probas_pos": 0.18092034908458868,
  "log_loss_probas_pos": 0.5370831285028451,
  "brier_score_probas_pos_neg": 0.16231437634363932,
  "log_loss_probas_pos_neg": 0.4898351556862251,
  "brier_score_probas_pos_neg_iso": 0.16163469243894837,
  "log_loss_probas_pos_neg_iso": 0.4870209616450951,
  "brier_score_probas_pos_iso": 0.19254436721470428,
  "log_loss_probas_pos_iso": 0.5615592260268177,
  "brier_score_probas_pos_sc": 0.1888307926375534,
  "log_loss_probas_pos_sc": 0.5547230007177081,
  "brier_score_probas_pos_neg_sc": 0.16231436288453022,
  "log_loss_probas_pos_neg_sc": 0.4898344686036387,
  "ece_scores": 0.4505600300990328,
  "ece_probas_pos": 0.11753036279451701,
  "ece_probas_pos_neg": 0.022471213650185028,
  "ece_probas_pos_neg_iso": 0.006846981749344497,
  "ece_probas_pos_iso": 0.14747494657805427,
  "ece_probas_pos_sc": 0.14281984797082461,
  "ece_probas_pos_neg_s

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 5,
  "brier_score_scores": 0.443506634446674,
  "log_loss_scores": 1.5242958195201615,
  "brier_score_probas_pos": 0.15201329069370087,
  "log_loss_probas_pos": 0.46958473983798893,
  "brier_score_probas_pos_neg": 0.13323192493161676,
  "log_loss_probas_pos_neg": 0.4147222181775302,
  "brier_score_probas_pos_neg_iso": 0.13336567042720532,
  "log_loss_probas_pos_neg_iso": 0.414814903200108,
  "brier_score_probas_pos_iso": 0.15402736004113057,
  "log_loss_probas_pos_iso": 0.47264937646660315,
  "brier_score_probas_pos_sc": 0.15669793228792864,
  "log_loss_probas_pos_sc": 0.4788816849985962,
  "brier_score_probas_pos_neg_sc": 0.13323195928451906,
  "log_loss_probas_pos_neg_sc": 0.4147226660593127,
  "ece_scores": 0.4607009358054849,
  "ece_probas_pos": 0.10586315002647154,
  "ece_probas_pos_neg": 0.01521211576244792,
  "ece_probas_pos_neg_iso": 0.009829387764596275,
  "ece_probas_pos_iso": 0.1015516201573718,
  "ece_probas_pos_sc": 0.12048983355090251,
  "ece_probas_pos_neg_sc"

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 10,
  "brier_score_scores": 0.4449743486881438,
  "log_loss_scores": 1.531114754635349,
  "brier_score_probas_pos": 0.1475938140653438,
  "log_loss_probas_pos": 0.45924728665003317,
  "brier_score_probas_pos_neg": 0.12908166102148239,
  "log_loss_probas_pos_neg": 0.4036669537004763,
  "brier_score_probas_pos_neg_iso": 0.12916904811544647,
  "log_loss_probas_pos_neg_iso": 0.4034930553150096,
  "brier_score_probas_pos_iso": 0.1479903422470931,
  "log_loss_probas_pos_iso": 0.4588063197774797,
  "brier_score_probas_pos_sc": 0.15136495022635757,
  "log_loss_probas_pos_sc": 0.4661357943041857,
  "brier_score_probas_pos_neg_sc": 0.12908177443247576,
  "log_loss_probas_pos_neg_sc": 0.4036677602275994,
  "ece_scores": 0.46178920431159587,
  "ece_probas_pos": 0.10781627132943644,
  "ece_probas_pos_neg": 0.011184428768135447,
  "ece_probas_pos_neg_iso": 0.007725199472405761,
  "ece_probas_pos_iso": 0.09503221551516096,
  "ece_probas_pos_sc": 0.11747048686754953,
  "ece_probas_pos_neg_s

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 20,
  "brier_score_scores": 0.44563209528380743,
  "log_loss_scores": 1.5335787178428342,
  "brier_score_probas_pos": 0.145080650046291,
  "log_loss_probas_pos": 0.45329740366389204,
  "brier_score_probas_pos_neg": 0.12679760281621352,
  "log_loss_probas_pos_neg": 0.39731446391077135,
  "brier_score_probas_pos_neg_iso": 0.1269213223747452,
  "log_loss_probas_pos_neg_iso": 0.3974572730331737,
  "brier_score_probas_pos_iso": 0.14499401537487708,
  "log_loss_probas_pos_iso": 0.4516454916188764,
  "brier_score_probas_pos_sc": 0.14838202571507067,
  "log_loss_probas_pos_sc": 0.45892898971826107,
  "brier_score_probas_pos_neg_sc": 0.1267976473010799,
  "log_loss_probas_pos_neg_sc": 0.39731445180523806,
  "ece_scores": 0.46229651736586247,
  "ece_probas_pos": 0.10859998988799816,
  "ece_probas_pos_neg": 0.01023814466416322,
  "ece_probas_pos_neg_iso": 0.010035126857443198,
  "ece_probas_pos_iso": 0.09186436106985635,
  "ece_probas_pos_sc": 0.11575681281804782,
  "ece_probas_pos_neg

  avg_pred_true = y_true[i_start:i_end].sum() / delta_count
  bin_centroid = y_prob[i_start:i_end].sum() / delta_count


{
  "eta": 50,
  "brier_score_scores": 0.4460858275793543,
  "log_loss_scores": 1.5355540995823067,
  "brier_score_probas_pos": 0.1423449120778154,
  "log_loss_probas_pos": 0.4470435699097237,
  "brier_score_probas_pos_neg": 0.12420871922158815,
  "log_loss_probas_pos_neg": 0.3905511126419684,
  "brier_score_probas_pos_neg_iso": 0.12449517218918156,
  "log_loss_probas_pos_neg_iso": 0.3907800792653795,
  "brier_score_probas_pos_iso": 0.14112705903950606,
  "log_loss_probas_pos_iso": 0.44249453902423647,
  "brier_score_probas_pos_sc": 0.14506060867547288,
  "log_loss_probas_pos_sc": 0.4511349851482815,
  "brier_score_probas_pos_neg_sc": 0.12420893648666705,
  "log_loss_probas_pos_neg_sc": 0.3905526241946029,
  "ece_scores": 0.462700612193795,
  "ece_probas_pos": 0.10918192700312589,
  "ece_probas_pos_neg": 0.010066107154073353,
  "ece_probas_pos_neg_iso": 0.009674655720074725,
  "ece_probas_pos_iso": 0.09115698174483002,
  "ece_probas_pos_sc": 0.11234289175428418,
  "ece_probas_pos_neg_s

In [None]:
eta_results = results

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df=pd.DataFrame(eta_results)
df.columns

In [None]:
df.to_csv("fb13_eta.csv", index=False)

In [None]:
plt.figure(figsize=(8, 5))
plt.rcParams.update({'font.size': 20, 'axes.titlesize': 18})
plt.plot(df.eta, df.brier_score_probas_pos_neg, lw=3, label="Platt scaling")
plt.plot(df.eta, df.brier_score_probas_pos_neg_iso, lw=3, label="Isotonic")
plt.plot(df.eta, df.brier_score_scores, lw=3, label="Uncalibrated scores")
plt.plot(df.eta, df.metrics_mrr, lw=3, label="MRR")
plt.xlabel("Eta")
plt.ylabel("Brier score")
plt.legend()
plt.title("FB13")

In [None]:
results = []

for k in [10, 25, 50, 100, 200, 500]:
    model = TransE(batches_count=64, seed=0, epochs=500, k=k, eta=20,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss='self_adversarial', verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)
    
    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'k': k,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

In [None]:
k_results = results

In [None]:
df=pd.DataFrame(k_results)
df.columns

In [None]:
df.to_csv("fb13_k.csv", index=False)

In [None]:
plt.figure(figsize=(8, 5))
plt.rcParams.update({'font.size': 20, 'axes.titlesize': 18})
plt.plot(df.k, df.brier_score_probas_pos_neg, lw=3, label="Platt scaling")
plt.plot(df.k, df.brier_score_probas_pos_neg_iso, lw=3, label="Isotonic")
plt.plot(df.k, df.brier_score_scores, lw=3, label="Uncalibrated scores")
plt.plot(df.k, df.metrics_mrr, lw=3, label="MRR")
plt.xlabel("Embedding size")
plt.ylabel("Brier score")
plt.legend()
plt.title("FB13")