# Probability Calibration in KG Embedding
This experiemnt is to investigate which calibration technique is the most suitable one given a dataset and a KG Embedding model.

Within this experiment, we are going to see the performance of 4 typical calibration techniques for 4 KGE models in 3 datasets:
- calibration techniques:
  - Platt Scaling
  - Isotonic Regression
  - Histogram BInning
  - Beta Calibration
- KG Embedding models
  - TransE
  - ComplEx
  - DistMult
  - HoLE
- Datasets
  - FB13k
  - Win11
  - Yago39

In [1]:
import sys
# enable importing the modules from probcalkge
sys.path.append('../')
sys.path.append('../probcalkge')

In [2]:
import numpy as np
import pandas as pd

In [3]:
from ampligraph.latent_features import RandomBaseline, TransE
import probcalkge

from probcalkge import Experiment
from probcalkge import get_calibrators
from probcalkge import get_datasets, get_fb13, get_kgemodels
from probcalkge import brier_score, negative_log_loss

In [9]:
# ds = get_datasets()
cals = get_calibrators()
kges = get_kgemodels()
exp = Experiment(cals=[cals.uncal, cals.platt], datasets=[get_fb13()], kges=[TransE(), kges.hoLE], metrics=[brier_score, negative_log_loss])



In [5]:
# exp.run()

In [10]:
import probcalkge
import importlib
importlib.reload(probcalkge)

from probcalkge import ExperimentResult



df = pd.DataFrame([[1,2],[3,4]], index=['bs', 'nll'], columns=['uncal', 'platt'])

res = {
    'transE': {
        'fb13': df,
        'wn11':df
    },
    'DistMult': {
        'fb13': df,
        'wn11': df
    },
}

expres = ExperimentResult(exp, res)

[[array([[1, 2],
       [3, 4]]), array([[1, 2],
       [3, 4]])], [array([[1, 2],
       [3, 4]]), array([[1, 2],
       [3, 4]])]]
{'cal': ['UncalCalibtator', 'PlattCalibtator'], 'kge': ['TransE', 'HolE'], 'dataset': ['FB13k'], 'metric': ['brier_score', 'negative_log_loss']}


ValueError: conflicting sizes for dimension 'dataset': length 2 on the data but length 1 on coordinate 'dataset'

In [None]:
print(exp.datasets)

[<caldatasets.DatasetWrapper object at 0x7f5ff95f16d0>]


In [None]:
from probcalkge.calutils import get_cls_name
[get_cls_name(i) for i in exp.cals]

['BetaCalibtator']