In [1]:
from typing import Any, Dict, List, Optional, Union, Tuple

import requests
import torch
from lm_polygraph import estimate_uncertainty
from lm_polygraph.estimators import *
from lm_polygraph.model_adapters.whitebox_visual import VisualWhiteboxModel
from PIL import Image

%load_ext autoreload
%autoreload 2
from transformers import (
    AutoModelForCausalLM,
    AutoModelForVision2Seq,
    AutoProcessor,
    AutoTokenizer,
)
from dataclasses import dataclass
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
base_model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")

# Create whitebox model with image
url = "https://huggingface.co/microsoft/kosmos-2-patch14-224/resolve/main/snowman.png"
model = VisualWhiteboxModel(base_model, processor, image_urls=[url])

# Test with input text
input_text = ["<grounding>An image of"]

estimator = MaximumTokenProbability()
uncertainty = estimate_uncertainty(model, estimator, input_text=input_text)
print("Uncertainty estimation:", uncertainty)

Keyword argument `return_dict` is not a valid argument for this processor and will be ignored.


Uncertainty estimation: UncertaintyOutput(uncertainty=array([-0.43950686, -0.55753523, -0.7364028 , -0.63372934, -1.        ,
       -0.96391106, -0.89820856, -0.9997953 , -0.34002206, -0.40260765,
       -0.31840694, -0.872435  , -0.85644513, -0.47168002, -0.9281716 ,
       -1.        , -0.08342137, -0.12618384, -0.99650866], dtype=float32), input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MaximumTokenProbability')


In [4]:
estimator = MeanTokenEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=1.3938682079315186, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MeanTokenEntropy')

In [4]:
estimator = MeanPointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-10.939052410294972, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MeanPointwiseMutualInformation')

In [5]:
estimator = MeanConditionalPointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-30.574629399421177, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MeanConditionalPointwiseMutualInformation')

In [6]:
estimator = ClaimConditionedProbability()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=-0.0024776310887388783, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='CCP')

In [7]:
estimator = MonteCarloNormalizedSequenceEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=1.4882061961428483, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MonteCarloNormalizedSequenceEntropy')

In [8]:
estimator = MonteCarloSequenceEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=38.14663201745516, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MonteCarloSequenceEntropy')

In [9]:
estimator = LexicalSimilarity(metric="rouge1")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.4925284515074195, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LexicalSimilarity_rouge1')

In [10]:
estimator = LexicalSimilarity(metric="rouge2")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.2044491397666343, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LexicalSimilarity_rouge2')

In [None]:
estimator = LexicalSimilarity(metric="rougeL")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.37681065721924395, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LexicalSimilarity_rougeL')

In [None]:
estimator = LexicalSimilarity(metric="BLEU")
estimate_uncertainty(model, estimator, input_text=input_text)

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


UncertaintyOutput(uncertainty=-0.0023143153537747807, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LexicalSimilarity_BLEU')

In [None]:
estimator = NumSemSets()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=4.0, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='NumSemSets')

In [None]:
estimator = EigValLaplacian(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=4.2550045549869555, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='EigValLaplacian_NLI_score_entail')

In [None]:
estimator = EigValLaplacian(similarity_score="NLI_score", affinity="contra")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))


UncertaintyOutput(uncertainty=1.5227961449172152, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='EigValLaplacian_NLI_score_contra')

In [None]:
estimator = EigValLaplacian(similarity_score="Jaccard_score")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=6.165138721581687, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='EigValLaplacian_Jaccard_score')

In [None]:
estimator = DegMat(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=0.751336669921875, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='DegMat_NLI_score_entail')

In [None]:
estimator = Eccentricity(similarity_score="NLI_score", affinity="entail")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=2.64618190942312, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='Eccentricity_NLI_score_entail')

In [None]:
estimator = Eccentricity(similarity_score="NLI_score", affinity="contra")
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=1.7340330921535638, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='Eccentricity_NLI_score_contra')

In [None]:
estimator = Eccentricity(similarity_score="Jaccard_score")
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=3.0, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='Eccentricity_Jaccard_score')

In [None]:
estimator = SemanticEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))


UncertaintyOutput(uncertainty=42.6698436271064, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='SemanticEntropy')

In [None]:
estimator = SAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-7.110739614758488, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='SAR')

In [None]:
estimator = TokenSAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.619343638420105, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='TokenSAR')

In [None]:
estimator = SentenceSAR()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=7.410428514598019, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='SentenceSAR')

In [None]:
estimator = RenyiNeg()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-20.160396575927734, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='RenyiNeg')

In [None]:
estimator = FisherRao()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.6960842609405518, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='FisherRao')

In [None]:
estimator = KernelLanguageEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))
  eigvs = np.linalg.eig(K + jitter * np.eye(K.shape[0])).eigenvalues.astype(


UncertaintyOutput(uncertainty=0.928356676367408, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='KernelLanguageEntropy')

In [None]:
estimator = LUQ()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=0.3486354947090149, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LUQ')

In [None]:
estimator = EigenScore()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=13.459794337654325, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='EigenScore')

In [None]:
model_name = model.model.config._name_or_path
estimator = Focus(
        model_name=model_name,
        path=f"../focus_data/{model_name}/token_idf.pkl",
        gamma=0.9,
        p=0.01,
        idf_dataset="togethercomputer/RedPajama-Data-1T-Sample",
        trust_remote_code=True,
        idf_seed=42,
        idf_dataset_size=1000,
        spacy_path="en_core_web_sm",
    )
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=8.79899084502623, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='Focus (gamma=0.9)')

In [37]:
#Blackbox!!!
estimator = PTrue()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=12.144603729248047, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='PTrue')

In [38]:
estimator = PTrueSampling()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=12.144603729248047, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='PTrueSampling')

In [39]:
estimator = Perplexity()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.6065594553947449, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='Perplexity')

In [40]:
estimator = MaximumSequenceProbability()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=12.131189346313477, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='MaximumSequenceProbability')

In [41]:
estimator = LexicalSimilarity('rougeL')
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=-0.4269182630510808, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='LexicalSimilarity_rougeL')

In [42]:
estimator = SemanticEntropy()
estimate_uncertainty(model, estimator, input_text=input_text)

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


UncertaintyOutput(uncertainty=42.05250200109869, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='SemanticEntropy')

In [43]:
estimator = PointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text=input_text)

UncertaintyOutput(uncertainty=0.8221019506454468, input_text=['<grounding>An image of'], generation_text='<phrase> A snowman</phrase><object><patch_index_0044><patch_index_0863></object> is sitting by<phrase> a campfire</phrase><object><patch_index_0005><patch_index_1007></object> in', generation_tokens=[64007, 95, 43867, 64008, 64009, 64057, 64876, 64010, 17, 1280, 32, 64007, 10, 30879, 64008, 64009, 64018, 65020, 64010], model_path=None, estimator='PointwiseMutualInformation')