In [1]:
%load_ext autoreload
%autoreload 2

from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
from lm_polygraph.utils.model import WhiteboxModel, BlackboxModel
from lm_polygraph import estimate_uncertainty
from lm_polygraph.estimators import MaximumTokenProbability, LexicalSimilarity, SemanticEntropy, PointwiseMutualInformation, EigValLaplacian, SAR
import torch

In [8]:
from datasets import load_dataset

# Load the 'trivia_qa' dataset with the 'rc.nocontext' subset
dataset = load_dataset("trivia_qa", "rc.nocontext")

# Check available splits
print(dataset)

# Example: Access the first sample from the training set
print(dataset["train"][0])


Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'question_id', 'question_source', 'entity_pages', 'search_results', 'answer'],
        num_rows: 138384
    })
    validation: Dataset({
        features: ['question', 'question_id', 'question_source', 'entity_pages', 'search_results', 'answer'],
        num_rows: 17944
    })
    test: Dataset({
        features: ['question', 'question_id', 'question_source', 'entity_pages', 'search_results', 'answer'],
        num_rows: 17210
    })
})
{'question': 'Which American-born Sinclair won the Nobel Prize for Literature in 1930?', 'question_id': 'tc_1', 'question_source': 'http://www.triviacountry.com/', 'entity_pages': {'doc_source': [], 'filename': [], 'title': [], 'wiki_context': []}, 'search_results': {'description': [], 'filename': [], 'rank': [], 'title': [], 'url': [], 'search_context': []}, 'answer': {'aliases': ['(Harry) Sinclair Lewis', 'Harry Sinclair Lewis', 'Lewis, (Harry) Sinclair', 'Grace Hegger', 'Sinclair Lewi

In [7]:
print(dataset["train"][0]['answers'].keys())


dict_keys(['input_text', 'answer_start', 'answer_end'])


### Initialize model

In [None]:
base_model = LlamaForCausalLM.from_pretrained(
    'meta-llama/Meta-Llama-3-8B-Instruct',
    device_map='cuda',
    torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct')
base_model.config.pad_token_id = 128001
base_model.generation_config.pad_token_id = 128001
tokenizer.pad_token = tokenizer.eos_token
model = WhiteboxModel(base_model, tokenizer)

### Token level UE

In [None]:
estimator = MaximumTokenProbability()
estimate_uncertainty(model, estimator, input_text='Who is George Bush?')

### Sequence level UE

In [None]:
estimator = LexicalSimilarity('rougeL')
estimate_uncertainty(model, estimator, input_text='Who is George Bush?')

In [None]:
estimator = SemanticEntropy()
estimate_uncertainty(model, estimator, input_text='Who is George Bush?')

In [None]:
estimator = PointwiseMutualInformation()
estimate_uncertainty(model, estimator, input_text='Once upon a time there was a little girl who liked to')

In [None]:
estimator = SAR()
estimate_uncertainty(model, estimator, input_text='Give your answer in a concise sentence. Who is George Bush?')

### BlackBox UE

In [None]:
model = BlackboxModel(
    'YOUR_OPENAI_TOKEN',
    'gpt-3.5-turbo'
)
estimator = EigValLaplacian(verbose=True)
estimate_uncertainty(model, estimator, input_text='When did Albert Einstein die?')

In [None]:
API_TOKEN = 'YOUR_API_TOKEN'
# for example let's take google/t5-small-ssm-nq model
MODEL_ID = 'google/t5-large-ssm-nqo'

model = BlackboxModel.from_huggingface(hf_api_token=API_TOKEN, hf_model_id=MODEL_ID, openai_api_key = None, openai_model_path = None)
ue_method = LexicalSimilarity()
input_text = "Who is George Bush?"
estimate_uncertainty(model, ue_method, input_text=input_text)

In [None]:
# for example let's take bigscience/bloomz-560m model
MODEL_ID = 'bigscience/bloomz-560m'

model = BlackboxModel.from_huggingface(hf_api_token=API_TOKEN, hf_model_id=MODEL_ID, openai_api_key = None, openai_model_path = None)
ue_method = LexicalSimilarity()
input_text = "Who is George Bush?"
estimate_uncertainty(model, ue_method, input_text=input_text)