In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cuda:0"
batch_size = 2
seed = 42

In [3]:
import torch
torch.cuda.empty_cache()

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig


model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2",
    load_in_8bit=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
tokenizer.pad_token = tokenizer.eos_token

generation_config = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
messages = [
    [
        {
            "role": "user", 
            "content": "Tell me a bio of Albert Einstein."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Alla Pugacheva."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Paul McCartney"
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [6]:
import os

from lm_polygraph.model_adapters import WhiteBoxModelBasic
from lm_polygraph.estimators import ClaimConditionedProbabilityClaim
from lm_polygraph.stat_calculators import *
from lm_polygraph.utils.openai_chat import OpenAIChat
from lm_polygraph.utils.deberta import Deberta


model_adapter = WhiteBoxModelBasic(model, tokenizer)

calc_infer_llm = InferCausalLMCalculator(tokenize=False)

os.environ["OPENAI_KEY"] = "<Your key>"
calc_claim_extractor = ClaimsExtractor(OpenAIChat("gpt-4"))

calc_claim_nli = GreedyAlternativesNLICalculator(Deberta())

estimator = ClaimConditionedProbabilityClaim()

  _torch_pytree._register_pytree_node(
2024-04-17 20:42:50.888094: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-17 20:42:51.212555: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-17 20:42:51.212596: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-17 20:42:51.214386: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-17 20:42:51.390250: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.


In [7]:
from torch.utils.data import DataLoader


args_generate = {"generation_config" : generation_config,
                 "max_new_tokens": 100}

data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt")

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, args_generate=args_generate))
    deps.update({"greedy_texts" : tokenizer.batch_decode(deps['greedy_tokens'])})
    deps.update(calc_claim_extractor(deps, texts=batch, model=model_adapter))
    deps.update(calc_claim_nli(deps, texts=None, model=model_adapter))

    uncertianty_scores = estimator(deps)

    for text, claims, ue_score in zip(deps["greedy_texts"], deps['claims'], uncertianty_scores):
        print("Output:", text)
        
        for claim, ue in zip(claims, ue_score):
            print("claim:", claim.claim_text)
            print("aligned tokens:", claim.aligned_tokens)
            print("UE score:", ue)

        print()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: Albert Einstein (14 March 1879 – 18 April 1955) was a German-born theoretical physicist who is widely recognized as one of the most influential scientists in history. He is best known for developing the theory of general relativity, one of the two pillars of modern physics (alongside quantum mechanics). He is also known for his mass–energy equivalence formula E = mc², which has been dubbed "the world
claim: Albert Einstein was born on 14 March 1879.
aligned tokens: [0, 1, 3, 4, 5]
UE score: -0.5041567570910105
claim: Albert Einstein died on 18 April 1955.
aligned tokens: [0, 1]
UE score: -0.9999800427529746
claim: He is best known for developing the theory of general relativity.
aligned tokens: [45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56]
UE score: -0.9967017976298952
claim: The theory of general relativity is one of the two pillars of modern physics.
aligned tokens: [50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66]
UE score: -0.9981486797756218
claim: The tw