# Low-Level Examples 
Here we present low-level examples of integrating LM-Polygraph into LLM inference using HF library.

## Initialize

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cuda:0"
dataset_name = "LM-Polygraph/triviaqa"
batch_size = 2

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    quantization_config=quantization_config,
    device_map=device,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
generation_config = GenerationConfig.from_pretrained(model_name_or_path)

## Sequence-Level Examples

In [4]:
messages = [
    [
        {
            "role": "user", 
            "content": "How many fingers on a coala's foot?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Who sang a song Yesterday?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Кто спел песню Кукла Колдуна?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Translate into French: 'I want a small cup of coffee'"
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [5]:
from lm_polygraph.stat_calculators.infer_causal_lm_calculator import InferCausalLMCalculator
from lm_polygraph.stat_calculators.greedy_alternatives_nli import GreedyAlternativesNLICalculator
from lm_polygraph.estimators.claim_conditioned_probability import ClaimConditionedProbability
from lm_polygraph.utils.deberta import Deberta
from lm_polygraph.model_adapters import WhiteboxModelBasic


max_new_tokens = 50
generation_config.temperature = 0.9
generation_config.do_sample = True

model_adapter = WhiteboxModelBasic(model, tokenizer, tokenizer_args={}, generation_parameters=generation_config)

calc_infer_llm = InferCausalLMCalculator(tokenize=False)
nli_model = Deberta(device=device)
nli_model.setup()
calc_nli = GreedyAlternativesNLICalculator(nli_model=nli_model)

estimator = ClaimConditionedProbability()

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
from torch.utils.data import DataLoader


data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt").to(device)

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, max_new_tokens=max_new_tokens))
    deps.update(calc_nli(deps, texts=None, model=model_adapter))

    uncertainty_scores = estimator(deps)
    generated_texts = tokenizer.batch_decode(deps['greedy_tokens'])
    
    for text, ue_score in zip(generated_texts, uncertainty_scores):
        print("Output:", text)
        print("Uncertainty score:", ue_score)
        print()

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: A koala's paws do not have an opposable thumb or digits similar to a human hand. Instead, they have five non-opposable, round pads with sharp claws. These pads help the koala grip
Uncertainty score: -0.04141769390181651

Output: The song "Yesterday" was written and performed by the English band The Beatles. However, it was originally sung solely by Paul McCartney during the recording sessions. The song was released as a solo composition credited to McCartney because
Uncertainty score: -0.005671583063792256

Output: I'm assuming you're asking who sang the song "Kukla Koldun" (Doll by Koldun). Koldun is a Russian singer, and he performed this song at the Eurovision Song Contest 2
Uncertainty score: -0.019087387991862634

Output: In French, the sentence "I want a small cup of coffee" can be translated as "Je veux une tasse petite de café." This sentence structure closely follows the English original, making it easy to remember. "Je veux"
Uncertainty score: -0.00017718172726055112


## Claim-Level Examples
Here we split text into actomic claims and quantify uncertainty of individual claims

In [7]:
messages = [
    [
        {
            "role": "user", 
            "content": "Tell me a bio of Albert Einstein."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Alla Pugacheva."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Paul McCartney."
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [None]:
import os

from lm_polygraph.model_adapters import WhiteboxModelBasic
from lm_polygraph.estimators import ClaimConditionedProbabilityClaim
from lm_polygraph.stat_calculators import *
from lm_polygraph.utils.openai_chat import OpenAIChat
from lm_polygraph.utils.deberta import Deberta


max_new_tokens = 50
generation_config.temperature = 0.9
generation_config.do_sample = True

model_adapter = WhiteboxModelBasic(model, tokenizer, tokenizer_args={}, generation_parameters=generation_config)

calc_infer_llm = InferCausalLMCalculator(tokenize=False)

os.environ["OPENAI_API_KEY"] = "<Your OpenAI API key>"
calc_claim_extractor = ClaimsExtractor(OpenAIChat("gpt-4o"))

calc_claim_nli = GreedyAlternativesNLICalculator(Deberta(device=device))

estimator = ClaimConditionedProbabilityClaim()

Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
from torch.utils.data import DataLoader


data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt").to(device)

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, max_new_tokens=max_new_tokens))
    deps.update({"greedy_texts" : tokenizer.batch_decode(deps['greedy_tokens'])})
    deps.update(calc_claim_extractor(deps, texts=batch, model=model_adapter))
    deps.update(calc_claim_nli(deps, texts=None, model=model_adapter))

    uncertainty_scores = estimator(deps)

    for text, claims, ue_score in zip(deps["greedy_texts"], deps['claims'], uncertainty_scores):
        print("Output:", text)
        
        for claim, ue in zip(claims, ue_score):
            print("claim:", claim.claim_text)
            print("aligned tokens:", claim.aligned_token_ids)
            print("UE score:", ue)

        print()

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: Albert Einstein (March 14, 1879 – April 18, 1955) was a theoretical physicist who is widely regarded as one of the greatest scientists in history. Born in Ulm,
claim: Albert Einstein was born on March 14, 1879.
aligned tokens: [0, 1, 3, 4, 6, 7, 10, 11, 12, 13]
UE score: -0.821775516672531
claim: Albert Einstein died on April 18, 1955.
aligned tokens: [0, 1, 15, 17, 18, 21, 22, 23, 24]
UE score: -0.9998953235981506
claim: Albert Einstein was a theoretical physicist.
aligned tokens: [0, 1, 26, 27, 28, 29, 30, 31]
UE score: -0.4184386514851561
claim: Albert Einstein is widely regarded as one of the greatest scientists in history.
aligned tokens: [0, 1, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43]
UE score: -0.7276474721926438

Output: Alla Pugacheva, born on November 17, 1949, in the Soviet Union, is a legendary Russian singer, songwriter, and television personality. Her full name is Alla Iosifovna P
claim: Alla Pugacheva was born on November 17, 1949.
aligned tokens: [0, 1, 2, 3,