In [2]:
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
import json
import os

model_path = "mistralai/Mistral-7B-Instruct-v0.2"
base_model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

mnli_model = AutoModelForSequenceClassification.from_pretrained("cross-encoder/nli-deberta-v3-xsmall")
mnli_tokenizer = AutoTokenizer.from_pretrained("cross-encoder/nli-deberta-v3-xsmall")

data_dir = "data/val"
data_file = "mushroom.en-val.v2.jsonl"
data_path = os.path.join(data_dir, data_file)

with open(data_path, "r") as f:
    data = [json.loads(line) for line in f]

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.32s/it]


{'id': 'val-en-3', 'lang': 'EN', 'model_input': 'Do all arthropods have antennae?', 'model_output_text': 'Yes, all arachnids have antennas. However, not all of them are visible to the naked eye.', 'model_id': 'tiiuae/falcon-7b-instruct', 'soft_labels': [{'start': 0, 'prob': 0.6, 'end': 3}, {'start': 3, 'prob': 0.4, 'end': 8}, {'start': 8, 'prob': 0.2, 'end': 9}, {'start': 9, 'prob': 0.6, 'end': 18}, {'start': 18, 'prob': 0.3, 'end': 24}, {'start': 24, 'prob': 0.4, 'end': 32}, {'start': 32, 'prob': 0.1, 'end': 34}, {'start': 34, 'prob': 0.4, 'end': 43}, {'start': 43, 'prob': 0.5, 'end': 63}, {'start': 63, 'prob': 0.7, 'end': 70}, {'start': 70, 'prob': 0.5, 'end': 78}, {'start': 78, 'prob': 0.7, 'end': 87}], 'hard_labels': [[0, 3], [9, 18], [63, 70], [78, 87]], 'model_output_logits': [-4.8190689087, -16.5279369354, -10.1344690323, -9.9476661682, -7.045940876, -12.0485277176, -14.586689949, -12.6738815308, -7.2347912788, -8.1060018539, -7.1135058403, -15.4829864502, -6.7274436951, -7.6964

In [8]:
sentence_idx = 0
print(data[sentence_idx])

{'id': 'val-en-22', 'lang': 'EN', 'model_input': "Where do Hamilton Academical F.C.'s play their home matches?", 'model_output_text': ' Hamilton Academical Football Club plays their home matches at New Douglas Park, which is located in Hamilton, South Lanarkshire, Scotland. The stadium has a capacity of around 6,000 and has been the home ground of Hamilton Academical since 1878, making it one of the oldest football grounds in the world still in use by a single club.', 'model_id': 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF', 'soft_labels': [{'start': 60, 'prob': 0.1, 'end': 79}, {'start': 98, 'prob': 0.2, 'end': 138}, {'start': 177, 'prob': 0.1, 'end': 182}, {'start': 215, 'prob': 0.1, 'end': 234}, {'start': 235, 'prob': 0.1, 'end': 241}, {'start': 241, 'prob': 0.8, 'end': 245}, {'start': 245, 'prob': 0.5, 'end': 247}, {'start': 247, 'prob': 0.6, 'end': 253}, {'start': 253, 'prob': 0.5, 'end': 254}, {'start': 254, 'prob': 0.6, 'end': 260}, {'start': 260, 'prob': 0.5, 'end': 268}, {'start':

In [14]:
# print answer and where it hallucinates (hard_labels)

print("Answer:", data[sentence_idx]["model_output_text"])

# hard labels are boundaries that work characterwise 
hallucination_boundaries = data[sentence_idx]["hard_labels"]
print("Hallucination boundaries:", hallucination_boundaries)
for hallucination_boundary in hallucination_boundaries:
    hallucination = data[sentence_idx]["model_output_text"][hallucination_boundary[0]:hallucination_boundary[1]]
    print("Hallucination:", hallucination)

Answer:  Hamilton Academical Football Club plays their home matches at New Douglas Park, which is located in Hamilton, South Lanarkshire, Scotland. The stadium has a capacity of around 6,000 and has been the home ground of Hamilton Academical since 1878, making it one of the oldest football grounds in the world still in use by a single club.
Hallucination boundaries: [[241, 245], [247, 253], [254, 260], [268, 283], [284, 294], [299, 304]]
Hallucination: 1878
Hallucination: making
Hallucination: it one
Hallucination: oldest football
Hallucination: grounds in
Hallucination: world


In [15]:
def get_mnli_label(sentence_1, sentence_2, model, tokenizer):
    inputs = tokenizer(sentence_1, sentence_2, return_tensors="pt")
    # make a prediction
    outputs = model(**inputs)
    # get the predicted class
    predicted_class_idx = outputs.logits.argmax().item()
    # get the predicted class name
    predicted_class_name = model.config.id2label[predicted_class_idx]
    return predicted_class_name

In [17]:
# get the probability distribution for each token generated by the model

input = data[sentence_idx]['model_input'] + " " + data[sentence_idx]['model_output_text']
input_ids = tokenizer.encode(input, return_tensors="pt")
print(input_ids.shape)
output = base_model(input_ids, return_dict=True)
print(output.logits.shape)

torch.Size([1, 93])
torch.Size([1, 93, 32000])


In [18]:
# do the softmax to get the probabilities for each token and keep only the topk
probabilities = output.logits.softmax(dim=-1)

topk = 10
topk_probabilities, topk_indices = probabilities.topk(topk, dim=-1)

# mask the probabilities of the input tokens
input_token_length = len(tokenizer.encode(data[0]['model_input']))

In [20]:
print(f"Full sentence: {tokenizer.decode(input_ids[0])}")
print("\n")

for i, sample_to_evaluate in enumerate(input_ids[0]):
    if i < input_token_length or i == len(input_ids[0]) - 1:
        continue

    positive_influence = 0
    total_influence = 0
    # print the actual token and the next one generated 
    print(f"Actual token: {tokenizer.decode(sample_to_evaluate.item())}")
    next_generated_token = tokenizer.decode(input_ids[0][i+1].item())
    print(f"Next generated token: {next_generated_token}")
    for j in range(topk): 
        token_id = topk_indices[0][i][j].item()
        token_prob = topk_probabilities[0][i][j].item()
        token = tokenizer.decode(token_id)
        relateness = get_mnli_label(next_generated_token, token, mnli_model, mnli_tokenizer)
        if relateness == "entailment":
            positive_influence += token_prob
            total_influence += token_prob
        elif relateness == "contradiction":
            total_influence += token_prob
        print(f"Token: {token}, Relateness: {relateness}, Probability: {token_prob}")
    print(f"Hallucination Score: {1 - (positive_influence/total_influence)}")
    print("\n")
    

Full sentence: <s> Where do Hamilton Academical F.C.'s play their home matches?  Hamilton Academical Football Club plays their home matches at New Douglas Park, which is located in Hamilton, South Lanarkshire, Scotland. The stadium has a capacity of around 6,000 and has been the home ground of Hamilton Academical since 1878, making it one of the oldest football grounds in the world still in use by a single club.


Actual token: ?
Next generated token: 
Token: 
, Relateness: entailment, Probability: 0.963034451007843
Token: Acc, Relateness: contradiction, Probability: 0.008610885590314865
Token: Acc, Relateness: contradiction, Probability: 0.008565135300159454
Token: New, Relateness: neutral, Probability: 0.00592818483710289
Token: New, Relateness: neutral, Probability: 0.0032623973675072193
Token: Dal, Relateness: neutral, Probability: 0.002606522524729371
Token: What, Relateness: contradiction, Probability: 0.0012952847173437476
Token: The, Relateness: entailment, Probability: 0.00095

In [None]:
sentence_1 = "gold"
sentence_2 = "silver"

get_mnli_label(sentence_1, sentence_2, mnli_model, mnli_tokenizer)

In [None]:
mnli_model