In [1]:
import numpy as np
import itertools
import json

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve, _SigmoidCalibration, _sigmoid_calibration
from ampligraph.evaluation import evaluate_performance, mr_score, mrr_score, hits_at_n_score, generate_corruptions_for_eval
from sklearn.metrics import brier_score_loss, log_loss, accuracy_score
from scipy.special import expit

from ampligraph.datasets import load_wordnet11
from ampligraph.latent_features.models import TransE, ComplEx, DistMult

In [2]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [3]:
X = load_wordnet11()

In [4]:
X_valid_pos = X['valid'][X['valid_labels'] == "1"]
X_valid_neg = X['valid'][X['valid_labels'] == "-1"]

X_test_pos = X['test'][X['test_labels'] == "1"]
X_test_neg = X['test'][X['test_labels'] == "-1"]

In [5]:
import types
from functools import partial
import tensorflow as tf
from sklearn.utils import check_random_state
from tqdm import tqdm
from ampligraph.datasets import AmpligraphDatasetAdapter, NumpyDatasetAdapter
from ampligraph.evaluation import generate_corruptions_for_fit, to_idx, generate_corruptions_for_eval, \
    hits_at_n_score, mrr_score


def generate_corruptions(self, X_pos, batches_count, epochs):
    try:
        tf.reset_default_graph()
        self.rnd = check_random_state(self.seed)
        tf.random.set_random_seed(self.seed)

        self._load_model_from_trained_params()

        dataset_handle = NumpyDatasetAdapter()
        dataset_handle.use_mappings(self.rel_to_idx, self.ent_to_idx)

        dataset_handle.set_data(X_pos, "pos")

        batch_size_pos = int(np.ceil(dataset_handle.get_size("pos") / batches_count))

        gen_fn = partial(dataset_handle.get_next_train_batch, batch_size=batch_size_pos, dataset_type="pos")
        dataset = tf.data.Dataset.from_generator(gen_fn,
                                                 output_types=tf.int32,
                                                 output_shapes=(None, 3))
        dataset = dataset.repeat().prefetch(1)
        dataset_iter = tf.data.make_one_shot_iterator(dataset)

        x_pos_tf = dataset_iter.get_next()

        e_s, e_p, e_o = self._lookup_embeddings(x_pos_tf)
        scores_pos = self._fn(e_s, e_p, e_o)

        x_neg_tf = generate_corruptions_for_fit(x_pos_tf,
                                                entities_list=None,
                                                eta=1,
                                                corrupt_side='s+o',
                                                entities_size=0,
                                                rnd=self.seed)

        e_s_neg, e_p_neg, e_o_neg = self._lookup_embeddings(x_neg_tf)
        scores = self._fn(e_s_neg, e_p_neg, e_o_neg)

        epoch_iterator_with_progress = tqdm(range(1, epochs + 1), disable=(not self.verbose), unit='epoch')

        scores_list = []
        with tf.Session(config=self.tf_config) as sess:
            sess.run(tf.global_variables_initializer())
            for _ in epoch_iterator_with_progress:
                losses = []
                for batch in range(batches_count):
                    scores_list.append(sess.run(scores))

        dataset_handle.cleanup()
        return np.concatenate(scores_list)
    
    except Exception as e:
        dataset_handle.cleanup()
        raise e

In [6]:
def pos_iso(cal_model, pos_scores, neg_scores, positive_base_rate):
    weigths_pos = len(neg_scores) / len(pos_scores)
    weights_neg = (1.0 - positive_base_rate) / positive_base_rate
    weights = np.concatenate((np.full(pos_scores.shape, weigths_pos),
                              np.full(neg_scores.shape, weights_neg))).astype(float)
    target =  np.concatenate((np.ones(pos_scores.shape), np.zeros(neg_scores.shape))).astype(float)
    x = np.concatenate((pos_scores, neg_scores)).astype(float)
    
    cal_model.fit(x, target, sample_weight=weights)
    return cal_model

In [7]:
losses =  ['self_adversarial', 'pairwise', 'nll', 'multiclass_nll']
models = [TransE, DistMult, ComplEx]

results = []

for m, l in itertools.product(models, losses):
    model = m(batches_count=64, seed=0, epochs=1000, k=100, eta=20,
                   optimizer='adam', optimizer_params={'lr':0.0001},
                   loss=l, verbose=False)

    model.fit(X['train'])
    
    scores = model.predict(X['test'])

    model.calibrate(X_valid_pos, batches_count=10, epochs=1000, positive_base_rate=0.5)
    print("pos", model.calibration_parameters)
    probas1 = model.predict_proba(X['test'])

    model.calibrate(X_valid_pos, X_valid_neg)
    print("pos neg", model.calibration_parameters)
    probas2 = model.predict_proba(X['test'])
    
    val_scores = model.predict(X['valid'])
    ir = IsotonicRegression(out_of_bounds='clip')
    ir.fit(np.squeeze(val_scores).astype(float), (X['valid_labels'] == "1").astype(float))
    probas3 = ir.predict(np.squeeze(scores).astype(float))
    
    model.generate_corruptions = types.MethodType(generate_corruptions, model)
    corruptions = model.generate_corruptions(X_valid_pos, batches_count=10, epochs=1000)
    val_pos_scores = np.squeeze(model.predict(X_valid_pos))
    iso_pos = pos_iso(IsotonicRegression(out_of_bounds='clip'), val_pos_scores, corruptions, positive_base_rate=0.5)
    probas4 = iso_pos.predict(np.squeeze(scores).astype(float))

    sc_pos = pos_iso(_SigmoidCalibration(), val_pos_scores, corruptions, positive_base_rate=0.5)
    print("pos sc", sc_pos.a_, sc_pos.b_)
    probas5 = sc_pos.predict(np.squeeze(scores).astype(float))
    
    val_neg_scores = np.squeeze(model.predict(X_valid_neg))
    sc_pos_neg = pos_iso(_SigmoidCalibration(), val_pos_scores, val_neg_scores, positive_base_rate=0.5)
    print("pos neg sc", sc_pos_neg.a_, sc_pos_neg.b_)
    probas6 = sc_pos_neg.predict(np.squeeze(scores).astype(float))
    
    thresholds = {r: np.median(np.sort(val_scores[X['valid'][:, 1] == r])) for r in np.unique(X['valid'][:, 1])}
    thresholds_test = np.vectorize(thresholds.get)(X['test'][:, 1])
    per_relation_acc = accuracy_score(X['test_labels'] == "1", scores > thresholds_test)

    acc_uncalib = accuracy_score(X['test_labels'] == "1", expit(scores) > 0.5)
    
    acc1 = accuracy_score(X['test_labels'] == "1", probas1 > 0.5)
    acc2 = accuracy_score(X['test_labels'] == "1", probas2 > 0.5)
    acc3 = accuracy_score(X['test_labels'] == "1", probas3 > 0.5)
    acc4 = accuracy_score(X['test_labels'] == "1", probas4 > 0.5)
    acc5 = accuracy_score(X['test_labels'] == "1", probas5 > 0.5)
    acc6 = accuracy_score(X['test_labels'] == "1", probas6 > 0.5)
    
    filter_triples = np.concatenate((X['train'], X_valid_pos, X_test_pos))
    ranks = evaluate_performance(X_test_pos, 
                                 model=model, 
                                 filter_triples=filter_triples,
                                 use_default_protocol=True, 
                                 verbose=False)

    results.append({
        'model': m.__name__,
        'loss': l,
        'brier_score_scores': brier_score_loss(X['test_labels'] == "1", expit(scores)),
        'log_loss_scores': log_loss(X['test_labels'] == "1", expit(scores), eps=1e-7),
        'brier_score_probas_pos': brier_score_loss(X['test_labels'] == "1", probas1),
        'log_loss_probas_pos': log_loss(X['test_labels'] == "1", probas1, eps=1e-7),
        'brier_score_probas_pos_neg': brier_score_loss(X['test_labels'] == "1", probas2),
        'log_loss_probas_pos_neg': log_loss(X['test_labels'] == "1", probas2, eps=1e-7),
        'brier_score_probas_pos_neg_iso': brier_score_loss(X['test_labels'] == "1", probas3),
        'log_loss_probas_pos_neg_iso': log_loss(X['test_labels'] == "1", probas3, eps=1e-7),
        'brier_score_probas_pos_iso': brier_score_loss(X['test_labels'] == "1", probas4),
        'log_loss_probas_pos_iso': log_loss(X['test_labels'] == "1", probas4, eps=1e-7),
        'brier_score_probas_pos_sc': brier_score_loss(X['test_labels'] == "1", probas5),
        'log_loss_probas_pos_sc': log_loss(X['test_labels'] == "1", probas5, eps=1e-7),
        'brier_score_probas_pos_neg_sc': brier_score_loss(X['test_labels'] == "1", probas6),
        'log_loss_probas_pos_neg_sc': log_loss(X['test_labels'] == "1", probas6, eps=1e-7),
        'metrics_mrr': mrr_score(ranks), 
        'metrics_hits@10': hits_at_n_score(ranks, n=10),
        'metrics_mr': mr_score(ranks),
        'accuracy_per_relation': per_relation_acc,
        'accuracy_uncalib': acc_uncalib,
        'accuracy_pos': acc1,
        'accuracy_pos_neg': acc2,
        'accuracy_pos_neg_iso': acc3,
        'accuracy_pos_iso': acc4,
        'accuracy_pos_sc': acc5,
        'accuracy_pos_neg_sc': acc6
    })
        
    print(json.dumps(results[-1], indent=2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.random.categorical instead.
Instructions for updating:
Use tf.cast instead.
pos [-1.1274556, -6.5298634]
pos neg [-1.4107325, -8.387997]
pos sc -1.2900756888307388 -7.571785121784022
pos neg sc -1.4077141445537837 -8.392524621470622
{
  "model": "TransE",
  "loss": "self_adversarial",
  "brier_score_scores": 0.4434563678584502,
  "log_loss_scores": 1.9590963525473455,
  "brier_score_probas_pos": 0.09170062490

pos [-1.5017359, 0.2463557]
pos neg [-1.5661008, 0.2837555]
pos sc -1.4961204469996572 0.24684792385484905
pos neg sc -1.5611115725184608 0.2588920054476671
{
  "model": "DistMult",
  "loss": "pairwise",
  "brier_score_scores": 0.22372148923502322,
  "log_loss_scores": 0.6359518142416128,
  "brier_score_probas_pos": 0.21739267629457276,
  "log_loss_probas_pos": 0.6216109085646678,
  "brier_score_probas_pos_neg": 0.21711057696651062,
  "log_loss_probas_pos_neg": 0.6210930614252177,
  "brier_score_probas_pos_neg_iso": 0.21071790145634928,
  "log_loss_probas_pos_neg_iso": 0.6065441836211973,
  "brier_score_probas_pos_iso": 0.2114897510191189,
  "log_loss_probas_pos_iso": 0.6057231980938763,
  "brier_score_probas_pos_sc": 0.2173919784528625,
  "log_loss_probas_pos_sc": 0.6216448168141524,
  "brier_score_probas_pos_neg_sc": 0.2172820310938064,
  "log_loss_probas_pos_neg_sc": 0.6212059096852354,
  "metrics_mrr": 0.050672041279498994,
  "metrics_hits@10": 0.09267463424685761,
  "metrics_mr": 

pos neg [-0.43160096, 0.38303527]
pos sc -0.41109317201371803 0.34896867013904154
pos neg sc -0.43033690381412776 0.3580689606369485
{
  "model": "ComplEx",
  "loss": "multiclass_nll",
  "brier_score_scores": 0.24258861539934493,
  "log_loss_scores": 0.6993416732912586,
  "brier_score_probas_pos": 0.20819566545498489,
  "log_loss_probas_pos": 0.598081543332762,
  "brier_score_probas_pos_neg": 0.20803234201165566,
  "log_loss_probas_pos_neg": 0.5978698275915597,
  "brier_score_probas_pos_neg_iso": 0.20318267527251133,
  "log_loss_probas_pos_neg_iso": 0.585003413740273,
  "brier_score_probas_pos_iso": 0.2031939091224325,
  "log_loss_probas_pos_iso": 0.5838916375191405,
  "brier_score_probas_pos_sc": 0.20816845440863285,
  "log_loss_probas_pos_sc": 0.5981220145878066,
  "brier_score_probas_pos_neg_sc": 0.20820947745496257,
  "log_loss_probas_pos_neg_sc": 0.5979637033700109,
  "metrics_mrr": 0.08577869054183085,
  "metrics_hits@10": 0.14140737688028024,
  "metrics_mr": 10967.95688234082,
 

In [8]:
import pandas as pd

In [9]:
def highlight_min(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

In [10]:
df = pd.DataFrame(results).set_index(['model', 'loss'])

In [11]:
bs = df[(c for c in df.columns if c.startswith('brier'))]
bs.columns = [c[len("brier_score_"):] for c in bs.columns]
bs.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.0917006,0.0875217,0.0891202,0.0873843,0.0892299,0.0897936,0.443456
TransE,pairwise,0.208614,0.20043,0.201634,0.198374,0.201751,0.202342,0.492508
TransE,nll,0.0938784,0.0877837,0.0929506,0.0878685,0.0931915,0.0938824,0.222027
TransE,multiclass_nll,0.204041,0.188619,0.203742,0.188639,0.20403,0.204013,0.49254
DistMult,self_adversarial,0.213759,0.208319,0.213457,0.2079,0.213611,0.213783,0.488378
DistMult,pairwise,0.217393,0.21149,0.217111,0.210718,0.217282,0.217392,0.223721
DistMult,nll,0.224419,0.21378,0.22416,0.213445,0.224352,0.224476,0.469119
DistMult,multiclass_nll,0.212217,0.20536,0.21192,0.204893,0.212098,0.212214,0.262494
ComplEx,self_adversarial,0.23994,0.228413,0.239894,0.228215,0.24004,0.239956,0.489981
ComplEx,pairwise,0.212921,0.208193,0.212847,0.207901,0.213018,0.212895,0.22596


In [12]:
ll = df[(c for c in df.columns if c.startswith('log_loss'))]
ll.columns = [c[len("log_loss_"):] for c in ll.columns]
ll.style.apply(highlight_min, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,probas_pos,probas_pos_iso,probas_pos_neg,probas_pos_neg_iso,probas_pos_neg_sc,probas_pos_sc,scores
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
TransE,self_adversarial,0.31107,0.295892,0.301613,0.29543,0.3017,0.304043,1.9591
TransE,pairwise,0.606402,0.589109,0.590702,0.585382,0.590642,0.591512,5.23391
TransE,nll,0.343691,0.300913,0.341729,0.298944,0.341975,0.343707,0.67009
TransE,multiclass_nll,0.599045,0.550538,0.598996,0.549596,0.599078,0.599014,7.66451
DistMult,self_adversarial,0.618347,0.601125,0.61821,0.603682,0.618237,0.618372,5.62464
DistMult,pairwise,0.621611,0.605723,0.621093,0.606544,0.621206,0.621645,0.635952
DistMult,nll,0.637803,0.607655,0.637868,0.611109,0.638026,0.637892,5.62026
DistMult,multiclass_nll,0.609136,0.591042,0.608781,0.587805,0.608875,0.609198,0.792607
ComplEx,self_adversarial,0.673685,0.650127,0.673559,0.650664,0.673705,0.673707,6.06107
ComplEx,pairwise,0.611003,0.595664,0.610913,0.59741,0.610996,0.610992,0.642548


In [13]:
print((bs.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
 .round(3)
 .to_latex()))

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   0.443 &           0.089 &               0.087 &       0.092 &           0.088 \\
1 &  DistMult &   0.488 &           0.213 &               0.208 &       0.214 &           0.208 \\
2 &   ComplEx &   0.490 &           0.240 &               0.228 &       0.240 &           0.228 \\
\bottomrule
\end{tabular}



In [14]:
print(ll.reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'scores', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrrr}
\toprule
{} &     model &  scores &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &    TransE &   1.959 &           0.302 &               0.295 &       0.311 &           0.296 \\
1 &  DistMult &   5.625 &           0.618 &               0.604 &       0.618 &           0.601 \\
2 &   ComplEx &   6.061 &           0.674 &               0.651 &       0.674 &           0.650 \\
\bottomrule
\end{tabular}



In [18]:
print(bs.reset_index()
 .query("model == 'TransE' ")
 [['loss',  'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &              loss &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &  self\_adversarial &           0.089 &               0.087 &       0.092 &           0.088 \\
1 &          pairwise &           0.202 &               0.198 &       0.209 &           0.200 \\
2 &               nll &           0.093 &               0.088 &       0.094 &           0.088 \\
3 &    multiclass\_nll &           0.204 &               0.189 &       0.204 &           0.189 \\
\bottomrule
\end{tabular}



In [19]:
print(ll.reset_index()
 .query("model == 'TransE' ")
 [['loss', 'probas_pos_neg', 'probas_pos_neg_iso', 'probas_pos', 'probas_pos_iso']]
 .reset_index(drop=True)
  .round(3)
 .to_latex())

\begin{tabular}{llrrrr}
\toprule
{} &              loss &  probas\_pos\_neg &  probas\_pos\_neg\_iso &  probas\_pos &  probas\_pos\_iso \\
\midrule
0 &  self\_adversarial &           0.302 &               0.295 &       0.311 &           0.296 \\
1 &          pairwise &           0.591 &               0.585 &       0.606 &           0.589 \\
2 &               nll &           0.342 &               0.299 &       0.344 &           0.301 \\
3 &    multiclass\_nll &           0.599 &               0.550 &       0.599 &           0.551 \\
\bottomrule
\end{tabular}



In [25]:
print((acc*100).reset_index()
 .query("loss == 'self_adversarial' ")
 [['model', 'pos_neg', 'pos_neg_iso', 'pos', 'pos_iso', 'uncalib', 'per_relation']]
 .reset_index(drop=True)
  .round(1)
 .to_latex())

\begin{tabular}{llrrrrrr}
\toprule
{} &     model &  pos\_neg &  pos\_neg\_iso &   pos &  pos\_iso &  uncalib &  per\_relation \\
\midrule
0 &    TransE &     88.8 &         88.9 &  88.8 &     88.9 &     50.7 &          88.2 \\
1 &  DistMult &     66.5 &         67.2 &  66.1 &     67.1 &     50.8 &          67.2 \\
2 &   ComplEx &     60.6 &         62.4 &  59.8 &     62.4 &     50.8 &          59.6 \\
\bottomrule
\end{tabular}



In [20]:
metrics = df[(c for c in df.columns if c.startswith('metrics'))]
metrics.columns = [c[len("metrics_"):] for c in metrics.columns]
metrics

Unnamed: 0_level_0,Unnamed: 1_level_0,hits@10,mr,mrr
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TransE,self_adversarial,0.308314,2284.527818,0.155107
TransE,pairwise,0.134247,7259.422007,0.057914
TransE,nll,0.226973,2516.159025,0.133237
TransE,multiclass_nll,0.235112,7436.874047,0.10849
DistMult,self_adversarial,0.08129,9999.899341,0.044532
DistMult,pairwise,0.092675,12092.100711,0.050672
DistMult,nll,0.092108,12594.587832,0.049014
DistMult,multiclass_nll,0.108902,11518.330311,0.060369
ComplEx,self_adversarial,0.093859,13814.839532,0.053883
ComplEx,pairwise,0.11395,11376.298578,0.067785


In [21]:
def highlight_max(s):
    is_min = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_min]

acc = df[(c for c in df.columns if c.startswith('accuracy'))]
acc.columns = [c[len("accuracy_"):] for c in acc.columns]
acc.style.apply(highlight_max, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,per_relation,pos,pos_iso,pos_neg,pos_neg_iso,pos_neg_sc,pos_sc,uncalib
model,loss,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
TransE,self_adversarial,0.881711,0.88775,0.889374,0.888257,0.888613,0.888156,0.889222,0.50746
TransE,pairwise,0.705623,0.6905,0.68913,0.690551,0.695626,0.689282,0.689282,0.50746
TransE,nll,0.87958,0.878463,0.882523,0.879174,0.882574,0.878463,0.878463,0.50746
TransE,multiclass_nll,0.6973,0.678271,0.71496,0.683041,0.714858,0.67888,0.678017,0.50746
DistMult,self_adversarial,0.67213,0.661169,0.671217,0.664671,0.67208,0.664823,0.660865,0.508373
DistMult,pairwise,0.625343,0.638334,0.658581,0.641226,0.656805,0.638334,0.638232,0.601543
DistMult,nll,0.611235,0.617984,0.646808,0.623465,0.649295,0.619456,0.616969,0.521719
DistMult,multiclass_nll,0.639957,0.654369,0.671775,0.657921,0.671927,0.655587,0.654471,0.611337
ComplEx,self_adversarial,0.595605,0.597686,0.624378,0.606059,0.624328,0.599259,0.597026,0.508069
ComplEx,pairwise,0.63945,0.651426,0.665432,0.653253,0.663859,0.65036,0.651629,0.609002


In [22]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'log_loss_probas_pos_neg', 'log_loss_probas_pos_neg_iso', 'log_loss_probas_pos', 'log_loss_probas_pos_iso']]

Unnamed: 0,index,log_loss_probas_pos_neg,log_loss_probas_pos_neg_iso,log_loss_probas_pos,log_loss_probas_pos_iso
0,accuracy_per_relation,-0.937063,-0.923077,-0.909091,-0.923077
1,accuracy_pos,-0.965035,-0.951049,-0.944056,-0.951049
2,accuracy_pos_iso,-0.972028,-0.986014,-0.972028,-0.986014
3,accuracy_pos_neg,-0.965035,-0.951049,-0.944056,-0.951049
4,accuracy_pos_neg_iso,-0.958042,-0.972028,-0.958042,-0.972028
5,accuracy_pos_neg_sc,-0.937063,-0.923077,-0.909091,-0.923077
6,accuracy_pos_sc,-0.965035,-0.951049,-0.944056,-0.951049
7,accuracy_uncalib,0.359488,0.359488,0.291861,0.359488


In [23]:
df.corr(method='spearman').reset_index().query("index.str.startswith('accuracy')")[['index', 'brier_score_probas_pos_neg', 'brier_score_probas_pos_neg_iso', 'brier_score_probas_pos', 'brier_score_probas_pos_iso']]

Unnamed: 0,index,brier_score_probas_pos_neg,brier_score_probas_pos_neg_iso,brier_score_probas_pos,brier_score_probas_pos_iso
0,accuracy_per_relation,-0.951049,-0.965035,-0.923077,-0.944056
1,accuracy_pos,-0.972028,-0.979021,-0.951049,-0.965035
2,accuracy_pos_iso,-0.986014,-1.0,-0.986014,-0.993007
3,accuracy_pos_neg,-0.972028,-0.979021,-0.951049,-0.965035
4,accuracy_pos_neg_iso,-0.972028,-0.993007,-0.972028,-0.979021
5,accuracy_pos_neg_sc,-0.951049,-0.965035,-0.923077,-0.944056
6,accuracy_pos_sc,-0.972028,-0.979021,-0.951049,-0.965035
7,accuracy_uncalib,0.427114,0.455588,0.359488,0.427114
