In [1]:
import numpy as np
import itertools
import pandas as pd
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_yago39k
from ampligraph.latent_features.models import TransE, ComplEx, DistMult, ConvKB, HolE
import types

In [2]:
from generate_corruptions import generate_corruptions, calibration_loss, pos_iso

In [3]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [4]:
X = load_yago39k()

In [5]:
X_valid_pos = X['valid'][X['valid_labels']]
X_valid_neg = X['valid'][~X['valid_labels']]

X_test_pos = X['test'][X['test_labels']]
X_test_neg = X['test'][~X['test_labels']]

In [None]:
losses =  ['self_adversarial', 'pairwise', 'nll', 'multiclass_nll']
models = [TransE, DistMult, ComplEx, HolE]

results = []

for m, l in itertools.product(models, losses):
    model = m(batches_count=64, seed=0, epochs=1000, k=100, eta=20,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss=l, verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels']).astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'], expit(scores) > 0.5)

    acc1 = accuracy_score(X['test_labels'], probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'], probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'], probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'], probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'], probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'], probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'model': m.__name__,
        'loss': l,
        'brier_score_scores': brier_score_loss(X['test_labels'], expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'], expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'], probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'], probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'], probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'], probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'], probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'], probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'], probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'], probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'], probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'], probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'], probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'], probas6, eps=1e-7),
        'ece_scores': calibration_loss(X['test_labels'], expit(scores)),
        'ece_probas_pos': calibration_loss(X['test_labels'], probas1),
        'ece_probas_pos_neg': calibration_loss(X['test_labels'], probas2),
        'ece_probas_pos_neg_iso': calibration_loss(X['test_labels'], probas3),
        'ece_probas_pos_iso': calibration_loss(X['test_labels'], probas4),
        'ece_probas_pos_sc': calibration_loss(X['test_labels'], probas5),
        'ece_probas_pos_neg_sc': calibration_loss(X['test_labels'], probas6),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

In [7]:
import pandas as pd

In [8]:
def highlight_min(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

In [18]:
df = pd.DataFrame(results).set_index(['model', 'loss'])
df.to_csv("main_results_yago39k.csv", index=False)

In [10]:
bs = df[(c for c in df.columns if c.startswith('brier'))]
bs.columns = [c[len("brier_score_"):] for c in bs.columns]
bs.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.113261,0.109323,0.0948481,0.0925331,0.0948037,0.106187,0.363166
TransE,pairwise,0.147097,0.112938,0.123208,0.103209,0.123322,0.139535,0.497721
TransE,nll,0.260197,0.200088,0.187495,0.170223,0.187556,0.234974,0.217831
TransE,multiclass_nll,0.127983,0.116302,0.110986,0.103676,0.110962,0.123264,0.497795
DistMult,self_adversarial,0.0908349,0.0886699,0.0813233,0.0785058,0.0813693,0.0929009,0.284488
DistMult,pairwise,0.167739,0.133492,0.129235,0.115426,0.129343,0.148949,0.371697
DistMult,nll,0.0785158,0.0761127,0.0717517,0.0690451,0.071781,0.0768545,0.104989
DistMult,multiclass_nll,0.171679,0.134586,0.126559,0.112589,0.126658,0.14929,0.456301
ComplEx,self_adversarial,0.0947329,0.0948809,0.0893608,0.0843688,0.0894297,0.097268,0.264185
ComplEx,pairwise,0.150527,0.122979,0.11482,0.106567,0.114902,0.13114,0.321746


In [11]:
ll = df[(c for c in df.columns if c.startswith('log_loss'))]
ll.columns = [c[len("log_loss_"):] for c in ll.columns]
ll.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.398989,0.375553,0.318615,0.308136,0.318619,0.38808,1.06281
TransE,pairwise,0.477392,0.392935,0.44455,0.351752,0.444555,0.478483,4.92195
TransE,nll,0.756064,0.621766,0.577225,0.518368,0.577283,0.679133,0.626278
TransE,multiclass_nll,0.430603,0.440291,0.39163,0.350141,0.391634,0.472841,7.86127
DistMult,self_adversarial,0.308823,0.307951,0.279444,0.265872,0.279448,0.310798,1.04257
DistMult,pairwise,0.503608,0.398294,0.415361,0.35635,0.415366,0.450427,1.46702
DistMult,nll,0.270794,0.261522,0.247208,0.231766,0.247211,0.263137,0.71899
DistMult,multiclass_nll,0.516133,0.405863,0.407079,0.351802,0.407085,0.450874,4.22456
ComplEx,self_adversarial,0.319064,0.312896,0.305282,0.277733,0.305286,0.322939,1.19863
ComplEx,pairwise,0.462699,0.36872,0.373346,0.329513,0.373351,0.404607,1.06687


In [12]:
print((bs.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
 .round(3)
 .to_latex()))

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   0.363 &           0.095 &               0.093 &       0.113 &           0.109 \\
1 &  DistMult &   0.284 &           0.081 &               0.079 &       0.091 &           0.089 \\
2 &   ComplEx &   0.264 &           0.089 &               0.084 &       0.095 &           0.095 \\
3 &      HolE &   0.345 &           0.141 &               0.140 &       0.166 &           0.162 \\
\bottomrule
\end{tabular}



In [13]:
print(ll.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   1.063 &           0.319 &               0.308 &       0.399 &           0.376 \\
1 &  DistMult &   1.043 &           0.279 &               0.266 &       0.309 &           0.308 \\
2 &   ComplEx &   1.199 &           0.305 &               0.278 &       0.319 &           0.313 \\
3 &      HolE &   1.065 &           0.444 &               0.438 &       0.581 &           0.537 \\
\bottomrule
\end{tabular}



In [15]:
print(bs.reset_index()
 .query("model == 'TransE' ")
 [['loss',  'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &              loss &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &  self\_adversarial &           0.095 &               0.093 &       0.113 &           0.109 \\
1 &          pairwise &           0.123 &               0.103 &       0.147 &           0.113 \\
2 &               nll &           0.187 &               0.170 &       0.260 &           0.200 \\
3 &    multiclass\_nll &           0.111 &               0.104 &       0.128 &           0.116 \\
\bottomrule
\end{tabular}



In [16]:
print(ll.reset_index()
 .query("model == 'TransE' ")
 [['loss', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &              loss &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &  self\_adversarial &           0.319 &               0.308 &       0.399 &           0.376 \\
1 &          pairwise &           0.445 &               0.352 &       0.477 &           0.393 \\
2 &               nll &           0.577 &               0.518 &       0.756 &           0.622 \\
3 &    multiclass\_nll &           0.392 &               0.350 &       0.431 &           0.440 \\
\bottomrule
\end{tabular}



In [23]:
acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,per_relation,pos,pos_iso,pos_neg,pos_neg_iso,pos_neg_sc,pos_sc,uncalib
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
TransE,self_adversarial,0.887581,0.852742,0.848548,0.871882,0.877903,0.871935,0.865215,0.502204
TransE,pairwise,0.898226,0.841505,0.865968,0.862796,0.871398,0.862688,0.823226,0.502204
TransE,nll,0.779946,0.620753,0.752903,0.714677,0.759677,0.713817,0.668441,0.502204
TransE,multiclass_nll,0.909624,0.838763,0.857849,0.866505,0.86957,0.866505,0.840753,0.502204
DistMult,self_adversarial,0.901882,0.881075,0.885376,0.889194,0.893118,0.889032,0.877151,0.567151
DistMult,pairwise,0.901344,0.757957,0.766989,0.818065,0.842043,0.817849,0.782796,0.540806
DistMult,nll,0.915269,0.896022,0.901882,0.903817,0.904462,0.904032,0.897043,0.883763
DistMult,multiclass_nll,0.909247,0.749462,0.786882,0.817312,0.843925,0.817419,0.78328,0.522688
ComplEx,self_adversarial,0.893817,0.868978,0.871559,0.872903,0.881828,0.872849,0.866452,0.610806
ComplEx,pairwise,0.902957,0.791075,0.802581,0.843602,0.852581,0.843118,0.816075,0.560753


In [24]:
print((acc*100).reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'pos_neg', 'pos_neg_iso', 'pos', 'pos_iso',  'uncalib', 'per_relation']]
 .reset_index(drop=True)
  .round(1)
 .to_latex())

\begin{tabular}{llrrrrrr}
\toprule
{} &     model &  pos\_neg &  pos\_neg\_iso &   pos &  pos\_iso &  uncalib &  per\_relation \\
\midrule
0 &    TransE &     87.2 &         87.8 &  85.3 &     84.9 &     50.2 &          88.8 \\
1 &  DistMult &     88.9 &         89.3 &  88.1 &     88.5 &     56.7 &          90.2 \\
2 &   ComplEx &     87.3 &         88.2 &  86.9 &     87.2 &     61.1 &          89.4 \\
3 &      HolE &     80.4 &         80.4 &  78.4 &     78.5 &     50.6 &          81.5 \\
\bottomrule
\end{tabular}



In [17]:
metrics = df[(c for c in df.columns if c.startswith('metrics'))]
metrics.columns = [c[len("metrics_"):] for c in metrics.columns]
metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,hits@10,mr,mrr
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TransE,self_adversarial,0.319365,244.245221,0.168552
TransE,pairwise,0.648666,500.20526,0.371433
TransE,nll,0.113187,1012.296846,0.063915
TransE,multiclass_nll,0.590021,216.366508,0.32491
DistMult,self_adversarial,0.619883,635.099039,0.30557
DistMult,pairwise,0.63306,648.001242,0.33737
DistMult,nll,0.647262,744.198078,0.330295
DistMult,multiclass_nll,0.585268,397.900475,0.308723
ComplEx,self_adversarial,0.752673,1074.095583,0.530927
ComplEx,pairwise,0.729182,587.079436,0.476633


In [22]:
print(metrics.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'mr', 'mrr', 'hits@10']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrr}
\toprule
{} &     model &        mr &    mrr &  hits@10 \\
\midrule
0 &    TransE &   244.245 &  0.169 &    0.319 \\
1 &  DistMult &   635.099 &  0.306 &    0.620 \\
2 &   ComplEx &  1074.096 &  0.531 &    0.753 \\
3 &      HolE &   921.523 &  0.101 &    0.189 \\
\bottomrule
\end{tabular}



In [None]:
def highlight_max(s):
    is_min = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_min]

acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_max, axis=1)

In [None]:
df.corr(method='spearman')

In [None]:
len(np.unique(X['valid'][:, 1]))

In [19]:
model = TransE(batches_count=64, seed=0, epochs=1000, k=100, eta=20,
               optimizer='adam', optimizer_params={'lr':0.0001},
               loss='self_adversarial', verbose=False)

model.fit(X['train'])

scores = model.predict(X['test'])

model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
print("pos", model.calibration_parameters)
probas1 = model.predict_proba(X['test'])

model.calibrate(X_valid_pos, X_valid_neg)
print("pos neg", model.calibration_parameters)
probas2 = model.predict_proba(X['test'])

val_scores = model.predict(X['valid'])

thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
per_relation_acc = accuracy_score(X['test_labels'], scores > thresholds_test)

print(thresholds)


pos [-2.0008023, -7.3683147]
pos neg [-1.8528421, -5.587557]
{0: -3.9869666, 1: -3.6161883, 2: -2.9660778, 3: -2.9241138, 4: -3.8640308, 5: -3.685308, 6: -2.861393, 7: -3.3280334, 8: -3.0741293, 9: -3.1950998, 10: -2.951118, 11: -1.8720441, 12: -2.4230814, 13: -1.542841, 14: -2.6944544, 15: -3.381497, 16: -1.8443029, 17: -3.4323683, 18: -1.6325312, 19: -4.2211304, 20: -4.101904, 21: -3.840962, 22: -1.832546, 23: -2.0101485, 24: -3.1512089, 25: -2.4524217, 27: -3.4848583, 29: -2.4269128, 31: -2.209188, 32: -1.3310984, 33: -2.3231838, 35: -2.0017974, 36: -1.3954651}


In [20]:
per_relation_acc

0.8876881720430108