In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cuda:0"
batch_size = 2

In [3]:
import torch
torch.cuda.empty_cache()

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig


model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2",
    load_in_8bit=True,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
tokenizer.pad_token = tokenizer.eos_token

generation_config = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
messages = [
    [
        {
            "role": "user", 
            "content": "Tell me a bio of Albert Einstein."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Alla Pugacheva."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Paul McCartney"
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [6]:
import os

from lm_polygraph.model_adapters import WhiteboxModelBasic
from lm_polygraph.estimators import ClaimConditionedProbabilityClaim
from lm_polygraph.stat_calculators import *
from lm_polygraph.utils.openai_chat import OpenAIChat
from lm_polygraph.utils.deberta import Deberta


model_adapter = WhiteboxModelBasic(model, tokenizer)

calc_infer_llm = InferCausalLMCalculator(tokenize=False)

os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
calc_claim_extractor = ClaimsExtractor(OpenAIChat("gpt-4"))

calc_claim_nli = GreedyAlternativesNLICalculator(Deberta())

estimator = ClaimConditionedProbabilityClaim()

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
from torch.utils.data import DataLoader


args_generate = {"generation_config" : generation_config,
                 "max_new_tokens": 100}

data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt")

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, args_generate=args_generate))
    deps.update({"greedy_texts" : tokenizer.batch_decode(deps['greedy_tokens'])})
    deps.update(calc_claim_extractor(deps, texts=batch, model=model_adapter))
    deps.update(calc_claim_nli(deps, texts=None, model=model_adapter))

    uncertianty_scores = estimator(deps)

    for text, claims, ue_score in zip(deps["greedy_texts"], deps['claims'], uncertianty_scores):
        print("Output:", text)
        
        for claim, ue in zip(claims, ue_score):
            print("claim:", claim.claim_text)
            print("aligned tokens:", claim.aligned_token_ids)
            print("UE score:", ue)

        print()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: Albert Einstein (March 14, 1879 – April 18, 1955) was a German-born theoretical physicist who is widely recognized as one of the most influential scientists in history. He is best known for developing the theory of relativity, which revolutionized our understanding of space, time, and gravity.

Einstein was born in Ulm, Württemberg, Germany, to Hermann and Pauline Einstein
claim: Albert Einstein was born on March 14, 1879.
aligned tokens: [0, 1, 3, 4, 6, 7, 10, 11, 12, 13]
UE score: -0.6843191534332486
claim: Albert Einstein died on April 18, 1955.
aligned tokens: [0, 1, 15, 17, 18, 21, 22, 23, 24]
UE score: -0.9999582851741198
claim: Albert Einstein was a German-born theoretical physicist.
aligned tokens: [0, 1, 26, 27, 28, 29, 30, 31, 32, 33, 34]
UE score: -0.6499102674047008
claim: Albert Einstein is widely recognized as one of the most influential scientists in history.
aligned tokens: [0, 1, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
UE score: -0.8114264568106548
clai