In [1]:
from typing import Any, Dict, List, Optional, Union, Tuple

import requests
import torch
from lm_polygraph import estimate_uncertainty
from lm_polygraph.estimators import *
from lm_polygraph.model_adapters.visual_whitebox_model import VisualWhiteboxModel
from PIL import Image

%load_ext autoreload
%autoreload 2
from transformers import (
    AutoModelForCausalLM,
    AutoModelForVision2Seq,
    AutoProcessor,
    AutoTokenizer,
)
from dataclasses import dataclass
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")

# Create whitebox model with image
url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.png"
model = VisualWhiteboxModel(base_model, processor, images=[url])

# Test with input text
input_text = ["<grounding>An image of"]

estimator = MaximumTokenProbability()
uncertainty = estimate_uncertainty(model, estimator, input_text=input_text, image=url)
print("Uncertainty estimation:", uncertainty)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
`generation_config` default values have been modified to match model-specific defaults: {'no_repeat_ngram_size': 3, 'pad_token_id': 1, 'bos_token_id': 0, 'eos_token_id': 2}. If this is not desired, please set these values explicitly.


Uncertainty estimation: UncertaintyOutput(uncertainty=array([-0.2788393 , -0.9490852 , -0.39342225, -0.2518251 , -0.37809426,
       -0.47034308, -0.9533952 , -1.        , -0.21120858, -0.20057407,
       -0.9998684 , -0.17277305, -0.4382389 , -0.31373882], dtype=float32), input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MaximumTokenProbability')


In [3]:
estimator = MeanTokenEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=2.3465352058410645, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MeanTokenEntropy')

In [4]:
estimator = MeanPointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

Keyword argument `return_dict` is not a valid argument for this processor and will be ignored.


UncertaintyOutput(uncertainty=-8.027487311841105, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MeanPointwiseMutualInformation')

In [5]:
estimator = MeanConditionalPointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-23.274238443598207, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MeanConditionalPointwiseMutualInformation')

In [6]:
estimator = ClaimConditionedProbability()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=-0.003799581815792311, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='CCP')

In [7]:
estimator = MonteCarloNormalizedSequenceEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=2.1621513140109903, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MonteCarloNormalizedSequenceEntropy')

In [8]:
estimator = MonteCarloSequenceEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=41.42206049134766, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MonteCarloSequenceEntropy')

In [9]:
estimator = LexicalSimilarity(metric="rouge1")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.24579640255254154, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LexicalSimilarity_rouge1')

In [10]:
estimator = LexicalSimilarity(metric="rouge2")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.04654150486699788, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LexicalSimilarity_rouge2')

In [11]:
estimator = LexicalSimilarity(metric="rougeL")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.2606535299532213, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LexicalSimilarity_rougeL')

In [12]:
estimator = LexicalSimilarity(metric="BLEU")
estimate_uncertainty(model, estimator, input_text=input_text)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


UncertaintyOutput(uncertainty=-5.42820029253991e-156, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LexicalSimilarity_BLEU')

In [13]:
estimator = NumSemSets()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=1.0, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='NumSemSets')

In [14]:
estimator = EigValLaplacian(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=5.151281267404556, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='EigValLaplacian_NLI_score_entail')

In [15]:
estimator = EigValLaplacian(similarity_score="NLI_score", affinity="contra")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=1.5288874531972327, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='EigValLaplacian_NLI_score_contra')

In [16]:
estimator = EigValLaplacian(similarity_score="Jaccard_score")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=7.512683177452175, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='EigValLaplacian_Jaccard_score')

In [17]:
estimator = DegMat(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=0.8299521636962891, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='DegMat_NLI_score_entail')

In [18]:
estimator = Eccentricity(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=2.8285619015965726, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='Eccentricity_NLI_score_entail')

In [19]:
estimator = Eccentricity(similarity_score="NLI_score", affinity="contra")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=1.0016187133523438, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='Eccentricity_NLI_score_contra')

In [20]:
estimator = Eccentricity(similarity_score="Jaccard_score")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=3.0000000000000004, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='Eccentricity_Jaccard_score')

In [21]:
estimator = SemanticEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=54.3795428112353, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='SemanticEntropy')

In [22]:
estimator = SAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-5.580256035871347, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='SAR')

In [23]:
estimator = TokenSAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.9435051679611206, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='TokenSAR')

In [24]:
estimator = SentenceSAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=13.684095689642689, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='SentenceSAR')

In [25]:
estimator = RenyiNeg()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-20.704326629638672, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='RenyiNeg')

In [26]:
estimator = FisherRao()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.606591522693634, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='FisherRao')

In [27]:
estimator = KernelLanguageEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  eigvs = np.linalg.eig(K + jitter * np.eye(K.shape[0])).eigenvalues.astype(


UncertaintyOutput(uncertainty=0.8768951916704488, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='KernelLanguageEntropy')

In [28]:
estimator = LUQ()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=0.5801714062690735, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LUQ')

In [29]:
estimator = EigenScore()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=13.314866285650002, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='EigenScore')

In [31]:
estimator = PTrue()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=11.790192604064941, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='PTrue')

In [32]:
estimator = PTrueSampling()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=11.790192604064941, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='PTrueSampling')

In [33]:
estimator = Perplexity()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.8815739750862122, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='Perplexity')

In [34]:
estimator = MaximumSequenceProbability()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=13.223609924316406, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='MaximumSequenceProbability')

In [35]:
estimator = LexicalSimilarity('rougeL')
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.34552215232850136, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='LexicalSimilarity_rougeL')

In [36]:
estimator = SemanticEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=59.61873066311618, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='SemanticEntropy')

In [37]:
estimator = PointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=1.2771196365356445, input_text=['<grounding>An image of'], generation_text='Snowman in<phrase> a hat</phrase><object><patch_index_0145><patch_index_0246></object> in the snow', generation_tokens=[6709, 581, 12, 64007, 10, 3958, 64008, 64009, 64158, 64259, 64010, 12, 5, 1842], model_path=None, estimator='PointwiseMutualInformation')