In [1]:
#!pip install transformers datasets torch


In [2]:
use_auth_token = "TOKEN"

In [3]:
import torch
import torch.nn.functional as F

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F
from collections import Counter
import random
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

def set_seed(seed):
    """
    Set the random seed for reproducibility.
    
    :param seed: The seed value to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


# Load the pre-trained model and tokenizer from Hugging Face
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cpu"  # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure model is in evaluation mode
model.eval()
model.to(device)



# Set pad_token_id to eos_token_id
model.config.pad_token_id = model.config.eos_token_id

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Label prob. method

In [4]:
def label_prob_method(model, tokenizer, question, device='cpu', n_samples=10):
    """
    Method for estimating the probability of the most likely answer to a question using the Label Prob. method.
    
    :param model: The model used for generating answers.
    :param tokenizer: Tokenizer for converting text to tokens.
    :param question: The question to be answered.
    :param device: The device (CPU or GPU) on which the model and tokenizer are running.
    :param n_samples: The number of samples to generate for probability estimation.
    :param seed: The random seed for reproducibility.
    :return: The most likely answer and its average probability.
    """

        
        
    # Formulate the question template
    prompt = f"Provide your best guess for the following question. Give ONLY the guess, no other words or explanation.\n\nFor example:\n\nGuess: <most likely guess, as short as possible; not a complete sentence, just the guess!>\n\nThe question is: {question}"

    # Tokenize the question
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_length = inputs.input_ids.shape[1]  # Length of input tokens

    # Generate n_samples answers to the question
    answers = []
    log_probabilities = []
    for j in range(n_samples):
        with torch.no_grad():
            set_seed(seed=j)
            outputs = model.generate(
                **inputs,
                max_length=input_length + 50,  # Allow room for the generated answer
                output_scores=True,
                return_dict_in_generate=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Extract generated tokens and logits
        generated_tokens = outputs.sequences[0][input_length:]  # Remove input tokens
        scores = outputs.scores[input_length-1:]  # Remove scores corresponding to input tokens
        
        # Вычисление логарифмов вероятностей для каждого токена
        token_log_probabilities = []
        for i, score in enumerate(scores[input_length-1:]):  # Начало с логитов после входных токенов
            token_logits = score[0]
            token_prob = F.softmax(token_logits, dim=-1)
            token_log_prob = torch.log(token_prob[generated_tokens[i]])
            token_log_probabilities.append(token_log_prob.item())
            
            # Прерывание, если встречен токен конца предложения
            if generated_tokens[i] == tokenizer.eos_token_id:
                break
        
        # Вычисление логарифма вероятности всего ответа
        sequence_log_probability = sum(token_log_probabilities)
        log_probabilities.append(sequence_log_probability)
        
        # Декодирование сгенерированного ответа
        answer = tokenizer.decode(generated_tokens, skip_special_tokens=True)
        answers.append(answer.strip())

    # Подсчет наиболее частого ответа
    most_common_answer, most_common_count = Counter(answers).most_common(1)[0]

    # Вычисление средней логарифмической вероятности наиболее частого ответа
    most_common_log_probabilities = [log_prob for ans, log_prob in zip(answers, log_probabilities) if ans == most_common_answer]
    average_log_probability = sum(most_common_log_probabilities) / len(most_common_log_probabilities)
    
    # Convert average log probability back to probability
    average_probability = torch.exp(torch.tensor(average_log_probability)).item()

    return most_common_answer, average_probability


In [5]:
# Пример использования
question = "What is the capital of Russia?"
most_common_answer, average_probability = label_prob_method(model, tokenizer, question, n_samples=2)
print(f"Most common answer: {most_common_answer}")
print(f"Average probability: {average_probability}")

Most common answer: Guess: Happiness, love, self-realization, or spiritual growth.
Average probability: 1.0


# is True

In [None]:
def generate_answer(model, tokenizer, question):
    prompt = f"Question: {question}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer.split("Answer:")[-1].strip()

def is_true_prob_prompt(model, tokenizer, question):
    answer = generate_answer(model, tokenizer, question)
    prompt = f"""
    Question: {question}
    Proposed Answer: {answer}
    Is the proposed answer:
      (A) True or
      (B) False?
    
    The proposed answer is:
    """
    return prompt

def get_probabilities(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    probs = torch.softmax(logits, dim=-1)
    
    true_token_id = tokenizer.convert_tokens_to_ids("True")
    false_token_id = tokenizer.convert_tokens_to_ids("False")
    
    true_prob = probs[0, -1, true_token_id].item()
    false_prob = probs[0, -1, false_token_id].item()
    
    return {"True": true_prob, "False": false_prob}

In [6]:
# Example usage
question = "What is the capital of Russia?"
prompt = is_true_prob_prompt(model, tokenizer, question)
probabilities = get_probabilities(model, tokenizer, prompt)
print(probabilities)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


{'True': 1.030938747703658e-07, 'False': 9.919518539902583e-10}


# Verbalised probability methods

In [9]:
data_path = "survey-results.csv"
# taken from https://waf.cs.illinois.edu/visualizations/Perception-of-Probability-Words/

words_probs = pd.read_csv(data_path, delimiter=",")
words_probs.rename(columns=lambda x: x.replace('"', ''), inplace=True)
words_probs_probs = words_probs.loc[:,'Almost Certain':'Chances are Slight'].median(axis=0).to_dict()

In [74]:
words_probs_probs = {'Almost Certain': 95.0,
 'Highly Likely': 90.0,
 'Very Good Chance': 80.0,
 'Probable': 70.0,
 'Likely': 70.0,
 'We Believe': 75.0,
 'Probably': 70.0,
 'Better than Even': 60.0,
 'About Even': 50.0,
 'We Doubt': 20.0,
 'Improbable': 10.0,
 'Unlikely': 20.0,
 'Probably Not': 25.0,
 'Little Chance': 10.0,
 'Almost No Chance': 2.0,
 'Highly Unlikely': 5.0,
 'Chances are Slight': 10.0}

In [59]:

# Установка pad_token в eos_token
tokenizer.pad_token = tokenizer.eos_token

def generate_text(prompt, max_length=800):
    # Токенизация с указанием attention_mask
    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
    
    # Установка pad_token_id в eos_token_id
    model.config.pad_token_id = model.config.eos_token_id
    
    # Генерация текста с использованием attention_mask
    outputs = model.generate(
        inputs.input_ids, 
        attention_mask=inputs.attention_mask, 
        max_length=max_length, 
        num_return_sequences=1,
        pad_token_id=model.config.eos_token_id
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


# 3. Verbalized 1-Stage Top-1 (`Verb. 1S top-1`)
def verb_1s_top1_prompt(question):
    prompt = f"""
    Provide your best guess and the probability that it is correct (0.0 to 1.0) for the following question. Give ONLY the guess and probability, no other words or explanation.
    
    For example:
    
    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>
    
    The question is: {question}
    """
    return generate_text(prompt)




# 4. Verbalized 1-Stage Top-k (`Verb. 1S top-k`)
def verb_1s_topk_prompt(question, k):
    prompt = f"Provide your {k} best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.\n\nFor example:\n\n"
    for i in range(1, k+1):
        prompt += f"G{i}: <{i}-th most likely guess, as short as possible; not a complete sentence, just the guess!>\nP{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n\n"
    prompt += f"The question is: {question}"
    return generate_text(prompt)

# 3. Verbalized 1-Stage Top-1 (`Verb. 1S top-1`)
def verb_2s_top1_prompt(question):
    prompt = f"""
    Provide your best guess for the following question. Give ONLY the guess, no other words or explanation.
    
    For example:
    
    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    The question is: {question}
    Provide the probability that your guess is correct. Give ONLY the probability, no
    other words or explanation.
    
    For example:
    
    Probability: <the probability between
    0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever;
    just the probability!>
    
    
    """
    return generate_text(prompt)

# 4. Verbalized 2-Stage Top-k (`Verb. 2S top-k`)
def verb_2s_topk_prompt(question, k):
    prompt = f"Provide your {k} best guess for the following question. Give ONLY the guess, no other words or explanation.\n\nFor example:\n\n"
    for i in range(1, k+1):
        prompt += f"G{i}: <{i}-th most likely guess, as short as possible; not a complete sentence, just the guess!>\n"
        prompt += f"P{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n\n"
    prompt += f"The question is: {question}\n"
    prompt += "Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.\n\nFor example:\n\n"
    for i in range(1, k+1):
        prompt += f"P{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n"
    
    return generate_text(prompt)

# 6. Verbalized 2-Stage Chain-of-Thought (`Verb. 2S CoT`)
def verb_2s_cot_prompt(question):
    prompt = f"""
    Provide your best guess for the following question. Before giving your answer, provide a step-by-step explanation of your thought process. Then on a new line give the guess with no other words or explanation.
    
    For example:
    
    Explanation:<one sentence step-by-step explanation of your thought process>

    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    
    The question is: {question}

    Provide the probability that your previous guess is correct (0.0 to 1.0). Give ONLY the probability, no other words or explanation.
    
    For example:
    
    Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>
    """
    return generate_text(prompt)




# 7. Linguistic 1-Stage Human (`Ling. 1S-human`)
def ling_1s_human_prompt(question):
    prompt = f"""
    Provide your best guess and the likelihood that it is correct for the following question, using one of the following expressions: {{'Almost Certain', 'Highly Likely', 'Very Good Chance', 'Probable', 'Likely', 'We Believe', 'Probably', 'Better than Even', 'About Even', 'We Doubt', 'Improbable', 'Unlikely', 'Probably Not', 'Little Chance', 'Almost No Chance', 'Highly Unlikely', 'Chances are Slight'}}. Give ONLY the guess and likelihood, no other words or explanation.
    
    For example:
    
    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    Likelihood: <one of the expressions from the list>
    
    The question is: {question}
    """
    return generate_text(prompt)




In [60]:
# Example usage
question = "What is the capital of Russia?"
print("Sample Question:", question)

Sample Question: What is the capital of Russia?


In [61]:
print("\nVerbalized 1-Stage Top-1 Method Output:")
print(verb_1s_top1_prompt(question))



Verbalized 1-Stage Top-1 Method Output:

    Provide your best guess and the probability that it is correct (0.0 to 1.0) for the following question. Give ONLY the guess and probability, no other words or explanation.
    
    For example:
    
    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>
    
    The question is: What is the capital of Russia?
    
    Guess: Moscow
    Probability: 1.0


In [62]:
print("\nVerbalized 1-Stage Top-2 Method Output:")
print(verb_1s_topk_prompt(question, 2))



Verbalized 1-Stage Top-2 Method Output:
Provide your 2 best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.

For example:

G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

The question is: What is the capital of Russia?

G1: Kiev
P1: 0.0

G2: Moscow
P2: 1.0


In [63]:
print("\nVerbalized 1-Stage Top-4 Method Output:")
print(verb_1s_topk_prompt(question, 4))


Verbalized 1-Stage Top-4 Method Output:
Provide your 4 best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.

For example:

G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

G3: <3-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P3: <the probability between 0.0 and 1.0 that G3 is correct, without any extra commentary whatsoever; just the probability!>

G4: <4-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P4: <the pro

In [64]:
print("\nVerbalized 2-Stage Top-1 Method Output:")
print(verb_2s_topk_prompt(question, 1))
#print("Guesses:", guesses)
#print("Probabilities:", probabilities)


Verbalized 2-Stage Top-1 Method Output:
Provide your 1 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

The question is: What is the capital of Russia?
Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.

For example:

P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>
G1: Moscow
P1: 1.0

Answer:

P1: 1.0
G1: Moscow


In [65]:
print("\nVerbalized 2-Stage Top-2 Method Output:")
print(verb_2s_topk_prompt(question, 2))
#print("Guesses:", guesses)
#print("Probabilities:", probabilities)


Verbalized 2-Stage Top-2 Method Output:
Provide your 2 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

The question is: What is the capital of Russia?
Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.

For example:

P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; 

In [69]:
print("\nVerbalized 2-Stage Top-4 Method Output:")
print(verb_2s_topk_prompt(question, 4))


Verbalized 2-Stage Top-4 Method Output:
Provide your 4 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

G3: <3-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P3: <the probability between 0.0 and 1.0 that G3 is correct, without any extra commentary whatsoever; just the probability!>

G4: <4-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P4: <the probability between 0.0 and 1.0 that G4 is correct, without any extra commentar

In [70]:
print("\nVerbalized 2-Stage Chain-of-Thought Method Output:")
print(verb_2s_cot_prompt(question))



Verbalized 2-Stage Chain-of-Thought Method Output:

    Provide your best guess for the following question. Before giving your answer, provide a step-by-step explanation of your thought process. Then on a new line give the guess with no other words or explanation.
    
    For example:
    
    Explanation:<one sentence step-by-step explanation of your thought process>

    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    
    The question is: What is the capital of Russia?

    Provide the probability that your previous guess is correct (0.0 to 1.0). Give ONLY the probability, no other words or explanation.
    
    For example:
    
    Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>
    
    Explanation: Russia is located in Europe and Asia, so its capital could be Moscow or St. Petersburg, both of which are major cities in Russia. However, Mos

In [71]:

print("\nLinguistic 1-Stage Human Method Output:")

output_ling_1s_human = ling_1s_human_prompt(question)
print(output_ling_1s_human)


Linguistic 1-Stage Human Method Output:



    Provide your best guess and the likelihood that it is correct for the following question, using one of the following expressions: {'Almost Certain', 'Highly Likely', 'Very Good Chance', 'Probable', 'Likely', 'We Believe', 'Probably', 'Better than Even', 'About Even', 'We Doubt', 'Improbable', 'Unlikely', 'Probably Not', 'Little Chance', 'Almost No Chance', 'Highly Unlikely', 'Chances are Slight'}. Give ONLY the guess and likelihood, no other words or explanation.
    
    For example:
    
    Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    Likelihood: <one of the expressions from the list>
    
    The question is: What is the capital of Russia?
    
    Guess: Moscow
    Likelihood: Almost Certain


In [72]:
output_ling_1s_human_list = output_ling_1s_human.split("\n")
answer = output_ling_1s_human_list[-2].split(": ")[1]
proba = words_probs_probs[output_ling_1s_human_list[-1].split(": ")[1]] / 100

In [73]:
answer, proba

('Moscow', 0.95)