# Low-Level Examples 
Here we present low-level examples of integrating LM-Polygraph into LLM inference using HF library.

## Initialize

In [None]:
%load_ext autoreload
%autoreload 2

In [14]:
model_name_or_path = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cuda:0"
dataset_name = "../workdir/data/triviaqa.csv"
batch_size = 2

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    quantization_config=quantization_config,
    device_map=device,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
generation_config = GenerationConfig.from_pretrained(model_name_or_path)

## Sequence-Level Examples

In [16]:
messages = [
    [
        {
            "role": "user", 
            "content": "How many fingers on a coala's foot?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Who sang a song Yesterday?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Кто спел песню Кукла Колдуна?"
        }
    ],
    [
        {
            "role": "user",
            "content": "Translate into French: 'I want a small cup of coffee'"
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [None]:
from lm_polygraph.stat_calculators.infer_causal_lm_calculator import InferCausalLMCalculator
from lm_polygraph.stat_calculators.greedy_alternatives_nli import GreedyAlternativesNLICalculator
from lm_polygraph.estimators.claim_conditioned_probability import ClaimConditionedProbability
from lm_polygraph.utils.deberta import Deberta
from lm_polygraph.model_adapters import WhiteboxModelBasic


model_adapter = WhiteboxModelBasic(model, tokenizer, tokenizer_args={})

calc_infer_llm = InferCausalLMCalculator(tokenize=False)
nli_model = Deberta(device=device)
nli_model.setup()
calc_nli = GreedyAlternativesNLICalculator(nli_model=nli_model)

estimator = ClaimConditionedProbability()

In [18]:
from torch.utils.data import DataLoader


args_generate = {"generation_config" : generation_config,
                 "max_new_tokens": 50}

data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt").to(device)

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, args_generate=args_generate))
    deps.update(calc_nli(deps, texts=None, model=model_adapter))

    uncertainty_scores = estimator(deps)
    generated_texts = tokenizer.batch_decode(deps['greedy_tokens'])
    
    for text, ue_score in zip(generated_texts, uncertainty_scores):
        print("Output:", text)
        print("Uncertainty score:", ue_score)
        print()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: A koala's paws do not have an opposable thumb or any other digit that resembles a human finger. Instead, they have five non-opposable, rough pads on each of their two hind feet and four on
Uncertainty score: -0.030883181827615604

Output: The song "Yesterday" was written and performed by the English singer-songwriter Paul McCartney, but it was originally performed by The Beatles. The song was released as a single in the United Kingdom on June 18,
Uncertainty score: -0.26628770133840346

Output: I'm assuming you're asking who sang the Russian folk song "Kukla Koldun," which is also known as "The Doll of Koldun." This song is a traditional Russian folk tune, and there are many different
Uncertainty score: -0.003979121489207858

Output: In French, the sentence "I want a small cup of coffee" can be translated as "Je veux une tasse petite de café" or "Je vais avoir une tasse de café petite". Both translations convey the same
Uncertainty score: -0.056271856615366156



## Claim-Level Examples
Here we split text into actomic claims and quantify uncertainty of individual claims

In [19]:
messages = [
    [
        {
            "role": "user", 
            "content": "Tell me a bio of Albert Einstein."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Alla Pugacheva."
        }
    ],
    [
        {
            "role": "user",
            "content": "Tell me a bio of Paul McCartney."
        }
    ]
]

chat_messages = [tokenizer.apply_chat_template(m, tokenize=False) for m in messages]

In [None]:
import os

from lm_polygraph.model_adapters import WhiteboxModelBasic
from lm_polygraph.estimators import ClaimConditionedProbabilityClaim
from lm_polygraph.stat_calculators import *
from lm_polygraph.utils.openai_chat import OpenAIChat
from lm_polygraph.utils.deberta import Deberta


model_adapter = WhiteboxModelBasic(model, tokenizer, tokenizer_args={})

calc_infer_llm = InferCausalLMCalculator(tokenize=False)

os.environ["OPENAI_API_KEY"] = "<Your OpenAI API key>"
calc_claim_extractor = ClaimsExtractor(OpenAIChat("gpt-4o"))

calc_claim_nli = GreedyAlternativesNLICalculator(Deberta(device=device))

estimator = ClaimConditionedProbabilityClaim()

In [21]:
from torch.utils.data import DataLoader


args_generate = {"generation_config" : generation_config,
                 "max_new_tokens": 100}

data_loader = DataLoader(chat_messages, batch_size=batch_size, shuffle=False, collate_fn=lambda x: x)
for batch in data_loader:
    encoded = tokenizer(batch, padding=True, return_tensors="pt").to(device)

    deps = {"model_inputs": encoded}
    deps.update(calc_infer_llm(
        deps, texts=batch, model=model_adapter, args_generate=args_generate))
    deps.update({"greedy_texts" : tokenizer.batch_decode(deps['greedy_tokens'])})
    deps.update(calc_claim_extractor(deps, texts=batch, model=model_adapter))
    deps.update(calc_claim_nli(deps, texts=None, model=model_adapter))

    uncertainty_scores = estimator(deps)

    for text, claims, ue_score in zip(deps["greedy_texts"], deps['claims'], uncertainty_scores):
        print("Output:", text)
        
        for claim, ue in zip(claims, ue_score):
            print("claim:", claim.claim_text)
            print("aligned tokens:", claim.aligned_token_ids)
            print("UE score:", ue)

        print()

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Output: Albert Einstein (March 14, 1879 – April 18, 1955) was a German-born theoretical physicist who is widely regarded as one of the most influential scientists in history. He is best known for developing the theory of general relativity, one of the two pillars of modern physics (alongside quantum mechanics). His work is also known for its influence on the philosophy of science. He is best known to the general public
claim: Albert Einstein was born on March 14, 1879.
aligned tokens: [0, 1, 3, 4, 6, 7, 10, 11, 12, 13]
UE score: -0.92425569683011
claim: Albert Einstein died on April 18, 1955.
aligned tokens: [0, 1, 15, 17, 18, 21, 22, 23, 24]
UE score: -0.9995946172894377
claim: Albert Einstein was a German-born theoretical physicist.
aligned tokens: [0, 1, 26, 27, 28, 29, 30, 31, 32, 33, 34]
UE score: -0.7058116064427322
claim: Albert Einstein is widely regarded as one of the most influential scientists in history.
aligned tokens: [0, 1, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]