In [1]:
import os

from lm_polygraph.utils.model import WhiteboxModel
from lm_polygraph.estimators import MaximumClaimProbability, ClaimConditionedProbabilityClaim
from lm_polygraph.stat_calculators import *
from lm_polygraph.utils.openai_chat import OpenAIChat
from lm_polygraph.utils.deberta import Deberta

In [2]:
model = WhiteboxModel.from_pretrained("bigscience/bloomz-560m")

In [3]:
texts = ["Tell me a bio of Albert Einstein."]
stat = {}

os.environ["OPENAI_KEY"] = "MY_OPENAI_TOKEN"

for calculator in [
    GreedyProbsCalculator(),
    ClaimsExtractor(OpenAIChat("gpt-4")),
]:
    stat.update(calculator(stat, texts, model))

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [4]:
print("Output:", stat["greedy_texts"][0])
print()
for claim in stat["claims"][0]:
    print("claim:", claim.claim_text)
    print("aligned tokens:", claim.aligned_tokens)
    print()

Output:  Albert Einstein (born August 1, 1905 in Berlin, Germany) was a German physicist and mathematician. He was the first person to observe the existence of gravity.

claim: Albert Einstein was born on August 1, 1905.
aligned tokens: [0, 1, 3, 4, 5, 7]

claim: Albert Einstein was born in Berlin, Germany.
aligned tokens: [0, 1, 3, 9, 11]

claim: Albert Einstein was a German physicist.
aligned tokens: [0, 1, 13, 14, 15, 16, 17]

claim: Albert Einstein was a mathematician.
aligned tokens: [0, 1, 13, 14, 19, 20]

claim: He was the first person to observe something.
aligned tokens: [22, 23, 24, 25, 26, 27, 28]

claim: What he observed was the existence of gravity.
aligned tokens: [22, 28, 29, 30, 31, 32]



In [5]:
max_prob = MaximumClaimProbability()
max_prob(stat)  # Uncertainty for each claim, the higher, the less certain

array([9.063859 , 3.237115 , 2.468878 , 3.382793 , 7.7747087, 7.9294844],
      dtype=float32)

In [6]:
for calculator in [
    GreedyAlternativesNLICalculator(Deberta())
]:
    stat.update(calculator(stat, texts, model))

ccp = ClaimConditionedProbabilityClaim()
ccp(stat)  # Uncertainty for each claim, the higher, the less certain

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


array([-0.00411375, -0.40427966, -0.20528004, -0.12898072, -0.56441526,
       -0.41163102])