In [14]:
#!pip install transformers datasets torch


In [15]:
use_auth_token = "TOKEN"

In [16]:
import torch
import torch.nn.functional as F

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import torch.nn.functional as F
from collections import Counter
import random
import numpy as np
import pandas as pd
from collections import Counter
import re

import warnings
warnings.filterwarnings("ignore")

def set_seed(seed):
    """
    Set the random seed for reproducibility.
    
    :param seed: The seed value to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


# Load the pre-trained model and tokenizer from Hugging Face
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cpu"  # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Ensure model is in evaluation mode
model.eval()
model.to(device)



# Set pad_token_id to eos_token_id
model.config.pad_token_id = model.config.eos_token_id

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Label prob. method

In [17]:
def generate_answers(question, num_samples=10):

    chat = [
        {"role": "user", "content":  "Provide your best guess for the following question. Give ONLY the guess, no other words or explanation.\n\n"},
        {"role": "assistant", "content": "Okey, please, give me an example of Guess."},
        {"role": "user", "content": "For example:\n\nGuess: <most likely guess, as short as possible; not a complete sentence, just the guess!>\n\n"},
        {"role": "assistant", "content": "Thank you, now please give me the question."},
        {"role": "user", "content": f"The question is: {question}"},
        ]
    
    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    # tokenisation and answer generation
    inputs = tokenizer(prompt, return_tensors="pt")
    answers = []
    for _ in range(num_samples):
        outputs = model.generate(**inputs, max_new_tokens=50, do_sample=True, temperature=1.0, top_p=1.0)
        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Extracting the part of the answer after "Guess:"
        guess = answer.split("Guess:")[-1].strip()
        answers.append(guess)
    
    return answers



def most_frequent_share(answers):
    if not answers:
        return 0
    counter = Counter(answers)
    most_common = counter.most_common(1)[0]
    most_common_count = most_common[1]
    total_count = len(answers)
    share = most_common_count / total_count
    return share

In [18]:
# Usage example
question = "What is the oldest country?"
answers = generate_answers(question, num_samples=10)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [19]:
answers

['Egypt.',
 'Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)',
 'Egypt (based on some theories suggesting civilization in Egypt dates back to around 3100 BCE)',
 'Egypt. (Based on the common belief that Egypt is one of the oldest civilizations in the world.)',
 'Egypt or Sumeria.',
 'Egypt or Sumeria.',
 'Egypt.',
 'Egypt or Sumeria.',
 'Egypt.',
 'Egypt or Sumer (Mesopotamia)']

In [22]:
# Загрузка модели и токенизатора для семантической схожести гипотез модели
tokenizer_semantic_similarity = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model_semantic_similarity = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

def get_sentence_embedding(sentence):
    inputs = tokenizer_semantic_similarity(sentence, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model_semantic_similarity(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze()

def check_equivalence(answers, threshold=0.5):
    embeddings = [get_sentence_embedding(answer) for answer in answers]
    embeddings = torch.stack(embeddings).numpy()
    
    # Вычисление косинусного сходства между всеми парами ответов
    cosine_matrix = cosine_similarity(embeddings)
    
    # Порог для определения эквивалентности (может быть настроен)
    
    equivalence = []
    for i in range(len(answers)):
        for j in range(i + 1, len(answers)):
            if cosine_matrix[i][j] > threshold:
                equivalence.append((answers[i], answers[j]))
    
    return equivalence



equivalent_answers = check_equivalence(answers)
print(equivalent_answers)

[('Egypt.', 'Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)'), ('Egypt.', 'Egypt. (Based on the common belief that Egypt is one of the oldest civilizations in the world.)'), ('Egypt.', 'Egypt or Sumeria.'), ('Egypt.', 'Egypt or Sumeria.'), ('Egypt.', 'Egypt.'), ('Egypt.', 'Egypt or Sumeria.'), ('Egypt.', 'Egypt.'), ('Egypt.', 'Egypt or Sumer (Mesopotamia)'), ('Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)', 'Egypt (based on some theories suggesting civilization in Egypt dates back to around 3100 BCE)'), ('Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)', 'Egypt. (Based on the common belief that Egypt is one of the oldest civilizations in the world.)'), ('Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)', 'Egypt or Sumeria.'), ('Egypt (based on some historical records suggesting Egypt as one of the oldest ci

In [23]:
def unite_tuples(tuples_list):
    result = []
    while tuples_list:
        current = tuples_list.pop(0)
        merged = list(current)
        i = 0
        while i < len(tuples_list):
            if any(item in merged for item in tuples_list[i]):
                merged.extend(tuples_list.pop(i))
                merged = list(set(merged))  # Удаляем дубликаты
                i = 0  # Начинаем проверку заново
            else:
                i += 1
        result.append(tuple(merged))
    return result

# Пример использования
same_semantics_tuples = unite_tuples(equivalent_answers)
print(same_semantics_tuples)
counts = [len(x) for x in same_semantics_tuples]
label_prob = max(counts) / sum(counts)
label_prob

[('Egypt. (Based on the common belief that Egypt is one of the oldest civilizations in the world.)', 'Egypt (based on some theories suggesting civilization in Egypt dates back to around 3100 BCE)', 'Egypt.', 'Egypt or Sumeria.', 'Egypt or Sumer (Mesopotamia)', 'Egypt (based on some historical records suggesting Egypt as one of the oldest civilizations)')]


1.0

# is True

In [25]:
def check_answer(question, answer):

    chat = [
        {"role": "user", "content":  "Question: {question}\n"},
        {"role": "assistant", "content": "Okey, please, give me a proposed answer."},
        {"role": "user", "content": "Proposed Answer: {answer}\n"},
        {"role": "assistant", "content": "Thank you, now please give me exact inctruction how to evaluate the proposed answer."},
        {"role": "user", "content": "Is the proposed answer:\n\t(A) True\n or\n\t(B) False\n\n Give ONLY the '(A) True' or '(B) False', no other words or explanation.\n"},
        {"role": "assistant", "content": "The proposed answer is:\n\n"},
        ]
    
    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    
    # tokenisation and generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=50)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)

    final_result = result.split("The proposed answer is:")[-1].strip().split("\n")[0].strip()
    
    return final_result

def is_true(question, answers, num_samples=10):
    results = [check_answer(question, answer) for answer in answers]
    return results

def normalize_answer(answer):
    # drop everything after '(A) True' or '(B) False'
    match = re.match(r"\((A|B)\) (True|False)", answer)
    if match:
        return f"({match.group(1)}) {match.group(2)}"
    return answer

def count_true_share(results):
    # answer normalization
    normalized_results = [normalize_answer(result) for result in results]
    
    # '(A) True' count
    true_count = normalized_results.count('(A) True')
    
    # '(A) True' share count
    total_count = len(normalized_results)
    true_share = true_count / total_count if total_count > 0 else 0
    
    return true_share

In [26]:
# Usage example
question = "What is the oldest country?"
result = is_true(question, answers, num_samples=10)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [27]:
result

['(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.',
 '(A) True if the question is clear and the answer is correct, based on the provided context.']

In [30]:
final_is_true_result = [normalize_answer(x) for x in result]
final_is_true_result

['(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True',
 '(A) True']

In [32]:
true_share = count_true_share(final_is_true_result)
true_share

1.0

# Verbalised probability methods

In [9]:
data_path = "survey-results.csv"
# taken from https://waf.cs.illinois.edu/visualizations/Perception-of-Probability-Words/

words_probs = pd.read_csv(data_path, delimiter=",")
words_probs.rename(columns=lambda x: x.replace('"', ''), inplace=True)
words_probs_probs = words_probs.loc[:,'Almost Certain':'Chances are Slight'].median(axis=0).to_dict()

In [42]:
words_probs_probs = {'Almost Certain': 95.0,
 'Highly Likely': 90.0,
 'Very Good Chance': 80.0,
 'Probable': 70.0,
 'Likely': 70.0,
 'We Believe': 75.0,
 'Probably': 70.0,
 'Better than Even': 60.0,
 'About Even': 50.0,
 'We Doubt': 20.0,
 'Improbable': 10.0,
 'Unlikely': 20.0,
 'Probably Not': 25.0,
 'Little Chance': 10.0,
 'Almost No Chance': 2.0,
 'Highly Unlikely': 5.0,
 'Chances are Slight': 10.0}

In [44]:

# Установка pad_token в eos_token
tokenizer.pad_token = tokenizer.eos_token

def generate_text(prompt, max_length=800):
    # Токенизация с указанием attention_mask
    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
    
    # Установка pad_token_id в eos_token_id
    model.config.pad_token_id = model.config.eos_token_id
    
    # Генерация текста с использованием attention_mask
    outputs = model.generate(
        inputs.input_ids, 
        attention_mask=inputs.attention_mask, 
        max_length=max_length, 
        num_return_sequences=1,
        pad_token_id=model.config.eos_token_id
    )
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


# 3. Verbalized 1-Stage Top-1 (`Verb. 1S top-1`)
def verb_1s_top1_prompt(question):

    chat = [
        {"role": "user", "content":  "Provide your best guess and the probability that it is correct (0.0 to 1.0) for the following question. Give ONLY the guess and probability, no other words or explanation.\n\n"},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": "Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>\nProbability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>"},
        {"role": "assistant", "content": "Thank you, now please give me the question."},
        {"role": "user", "content": f"The question is: {question}\n"},
        ]
    
    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    return generate_text(prompt)




# 4. Verbalized 1-Stage Top-k (`Verb. 1S top-k`)
def verb_1s_topk_prompt(question, k):
    
    
    content_1_user = f"Provide your {k} best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.\n\nFor example:\n\n"

    content_2_user = "" 
    for i in range(1, k+1):
        content_2_user += f"G{i}: <{i}-th most likely guess, as short as possible; not a complete sentence, just the guess!>\nP{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n\n"
    
    content_3_user = f"The question is: {question}"

    chat = [
        {"role": "user", "content": content_1_user},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": content_2_user},
        {"role": "assistant", "content": "Thank you, now please give me the question."},
        {"role": "user", "content": content_3_user},
        ]


    prompt = tokenizer.apply_chat_template(chat, tokenize=False)
    
    return generate_text(prompt)

# 3. Verbalized 1-Stage Top-1 (`Verb. 1S top-1`)
def verb_2s_top1_prompt(question):
    # prompt = f"""
    
    
    # For example:
    
    # Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
    # The question is: {question}
    # Provide the probability that your guess is correct. Give ONLY the probability, no
    # other words or explanation.
    
    # For example:
    
    # Probability: <the probability between
    # 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever;
    # just the probability!>
    
    
    # """

    content_1_user = f"Provide your best guess for the following question. Give ONLY the guess, no other words or explanation.\n\n"

    content_2_user = f"For example:\n\n\nGuess: <most likely guess, as short as possible; not a complete sentence, just the guess!>\nThe question is: {question}\n\nProvide the probability that your guess is correct. Give ONLY the probability, no other words or explanation." 
    
    content_3_user = "For example:\n\n\nProbability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>"

    chat = [
        {"role": "user", "content": content_1_user},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": content_2_user},
        {"role": "assistant", "content": "Thank you, please give me the instructions about probability."},
        {"role": "user", "content": content_3_user},
        ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    return generate_text(prompt)

# 4. Verbalized 2-Stage Top-k (`Verb. 2S top-k`)
def verb_2s_topk_prompt(question, k):
    content_1_user = f"Provide your {k} best guess for the following question. Give ONLY the guess, no other words or explanation.\n\nFor example:\n\n"

    content_2_user = ""
    for i in range(1, k+1):
        content_2_user += f"G{i}: <{i}-th most likely guess, as short as possible; not a complete sentence, just the guess!>\n"
        content_2_user += f"P{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n\n"
    
    content_3_user = f"The question is: {question}\n"

    content_4_user = "Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.\n\nFor example:\n\n"
    for i in range(1, k+1):
        content_4_user += f"P{i}: <the probability between 0.0 and 1.0 that G{i} is correct, without any extra commentary whatsoever; just the probability!>\n"
    
    chat = [
        {"role": "user", "content": content_1_user},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": content_2_user},
        {"role": "assistant", "content": "Thank you, please give me the question."},
        {"role": "user", "content": content_3_user},
        {"role": "assistant", "content": "Thank you, please give me the instructions about probability."},
        {"role": "user", "content": content_4_user},
        ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    return generate_text(prompt)

# 6. Verbalized 2-Stage Chain-of-Thought (`Verb. 2S CoT`)
def verb_2s_cot_prompt(question):

    content_1_user = "Provide your best guess for the following question. Before giving your answer, provide a step-by-step explanation of your thought process. Then on a new line give the guess with no other words or explanation."
    content_2_user = "For example:\n\nExplanation:<one sentence step-by-step explanation of your thought process>\n\nGuess: <most likely guess, as short as possible; not a complete sentence, just the guess!>"
    content_3_user = f"The question is: {question}"
    content_4_user = "Provide the probability that your previous guess is correct (0.0 to 1.0). Give ONLY the probability, no other words or explanation.\n\nFor example:\n\nProbability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!>"
    
    chat = [
        {"role": "user", "content": content_1_user},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": content_2_user},
        {"role": "assistant", "content": "Thank you, please give me the question."},
        {"role": "user", "content": content_3_user},
        {"role": "assistant", "content": "Thank you, please give me the instructions about probability."},
        {"role": "user", "content": content_4_user},
        ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    return generate_text(prompt)




# 7. Linguistic 1-Stage Human (`Ling. 1S-human`)
def ling_1s_human_prompt(question):
    
    content_1_user = "Provide your best guess and the likelihood that it is correct for the following question, using one of the following expressions: {{'Almost Certain', 'Highly Likely', 'Very Good Chance', 'Probable', 'Likely', 'We Believe', 'Probably', 'Better than Even', 'About Even', 'We Doubt', 'Improbable', 'Unlikely', 'Probably Not', 'Little Chance', 'Almost No Chance', 'Highly Unlikely', 'Chances are Slight'}}. Give ONLY the guess and likelihood, no other words or explanation."
    content_2_user = "For example:\n\nGuess: <most likely guess, as short as possible; not a complete sentence, just the guess!>\nLikelihood: <one of the expressions from the list>"
    content_3_user = f"The question is: {question}"

    chat = [
        {"role": "user", "content": content_1_user},
        {"role": "assistant", "content": "Okey, please, give me an example."},
        {"role": "user", "content": content_2_user},
        {"role": "assistant", "content": "Thank you, please give me the question."},
        {"role": "user", "content": content_3_user},
        ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False)

    return generate_text(prompt)




In [40]:
# Example usage
question = "What is the capital of Russia?"
print("Sample Question:", question)

Sample Question: What is the capital of Russia?


In [41]:
print("\nVerbalized 1-Stage Top-1 Method Output:")
print(verb_1s_top1_prompt(question))



Verbalized 1-Stage Top-1 Method Output:
[INST] Provide your best guess and the probability that it is correct (0.0 to 1.0) for the following question. Give ONLY the guess and probability, no other words or explanation.

 [/INST] Okey, please, give me an example. [INST] Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!> [/INST] Thank you, now please give me the question. [INST] The question is: What is the capital of Russia?
 [/INST] Guess: Moscow
Probability: 1.0


In [45]:
print("\nVerbalized 1-Stage Top-2 Method Output:")
print(verb_1s_topk_prompt(question, 2))



Verbalized 1-Stage Top-2 Method Output:
[INST] Provide your 2 best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.

For example:

 [/INST] Okey, please, give me an example. [INST] G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

 [/INST] Thank you, now please give me the question. [INST] The question is: What is the capital of Russia? [/INST] G1: Moscow
P1: 0.95

G2: Saint Petersburg
P2: 0.05


In [46]:
print("\nVerbalized 1-Stage Top-4 Method Output:")
print(verb_1s_topk_prompt(question, 4))


Verbalized 1-Stage Top-4 Method Output:
[INST] Provide your 4 best guesses and the probability that each is correct (0.0 to 1.0) for the following question. Give ONLY the guesses and probabilities, no other words or explanation.

For example:

 [/INST] Okey, please, give me an example. [INST] G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

G3: <3-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P3: <the probability between 0.0 and 1.0 that G3 is correct, without any extra commentary whatsoever; just the probability!>

G4: <4-th most likely guess, as short as possibl

In [47]:
print("\nVerbalized 2-Stage Top-1 Method Output:")
print(verb_2s_topk_prompt(question, 1))
#print("Guesses:", guesses)
#print("Probabilities:", probabilities)


Verbalized 2-Stage Top-1 Method Output:
[INST] Provide your 1 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

 [/INST] Okey, please, give me an example. [INST] G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

 [/INST] Thank you, please give me the question. [INST] The question is: What is the capital of Russia?
 [/INST] Thank you, please give me the instructions about probability. [INST] Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.

For example:

P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>
 [/INST] G1: Moscow
 P1: 1.0

(Note: The probability 1.0 indicates a certain answer, in this case, the capital of Russia is Moscow

In [48]:
print("\nVerbalized 2-Stage Top-2 Method Output:")
print(verb_2s_topk_prompt(question, 2))
#print("Guesses:", guesses)
#print("Probabilities:", probabilities)


Verbalized 2-Stage Top-2 Method Output:


[INST] Provide your 2 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

 [/INST] Okey, please, give me an example. [INST] G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

 [/INST] Thank you, please give me the question. [INST] The question is: What is the capital of Russia?
 [/INST] Thank you, please give me the instructions about probability. [INST] Provide the probability that your guess is correct. Give ONLY the probability, no other words or explanation.

For example:

P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extr

In [49]:
print("\nVerbalized 2-Stage Top-4 Method Output:")
print(verb_2s_topk_prompt(question, 4))


Verbalized 2-Stage Top-4 Method Output:
[INST] Provide your 4 best guess for the following question. Give ONLY the guess, no other words or explanation.

For example:

 [/INST] Okey, please, give me an example. [INST] G1: <1-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P1: <the probability between 0.0 and 1.0 that G1 is correct, without any extra commentary whatsoever; just the probability!>

G2: <2-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P2: <the probability between 0.0 and 1.0 that G2 is correct, without any extra commentary whatsoever; just the probability!>

G3: <3-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P3: <the probability between 0.0 and 1.0 that G3 is correct, without any extra commentary whatsoever; just the probability!>

G4: <4-th most likely guess, as short as possible; not a complete sentence, just the guess!>
P4: <the probability between 0.

In [50]:
print("\nVerbalized 2-Stage Chain-of-Thought Method Output:")
print(verb_2s_cot_prompt(question))



Verbalized 2-Stage Chain-of-Thought Method Output:
[INST] Provide your best guess for the following question. Before giving your answer, provide a step-by-step explanation of your thought process. Then on a new line give the guess with no other words or explanation. [/INST] Okey, please, give me an example. [INST] For example:

Explanation:<one sentence step-by-step explanation of your thought process>

Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!> [/INST] Thank you, please give me the question. [INST] The question is: What is the capital of Russia? [/INST] Thank you, please give me the instructions about probability. [INST] Provide the probability that your previous guess is correct (0.0 to 1.0). Give ONLY the probability, no other words or explanation.

For example:

Probability: <the probability between 0.0 and 1.0 that your guess is correct, without any extra commentary whatsoever; just the probability!> [/INST] Probability: 1.0

Explan

In [51]:

print("\nLinguistic 1-Stage Human Method Output:")

output_ling_1s_human = ling_1s_human_prompt(question)
print(output_ling_1s_human)


Linguistic 1-Stage Human Method Output:
[INST] Provide your best guess and the likelihood that it is correct for the following question, using one of the following expressions: {{'Almost Certain', 'Highly Likely', 'Very Good Chance', 'Probable', 'Likely', 'We Believe', 'Probably', 'Better than Even', 'About Even', 'We Doubt', 'Improbable', 'Unlikely', 'Probably Not', 'Little Chance', 'Almost No Chance', 'Highly Unlikely', 'Chances are Slight'}}. Give ONLY the guess and likelihood, no other words or explanation. [/INST] Okey, please, give me an example. [INST] For example:

Guess: <most likely guess, as short as possible; not a complete sentence, just the guess!>
Likelihood: <one of the expressions from the list> [/INST] Thank you, please give me the question. [INST] The question is: What is the capital of Russia? [/INST] Guess: Moscow
Likelihood: Almost Certain


In [58]:
output_ling_1s_human_list = output_ling_1s_human.split("\n")
answer = output_ling_1s_human_list[-1].split("Likelihood:")[-1]
proba = words_probs_probs[output_ling_1s_human_list[-1].split(": ")[1]] / 100

In [59]:
answer, proba

(' Almost Certain', 0.95)