In [5]:
import os
from dotenv import load_dotenv
from collections import Counter
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [6]:
load_dotenv()

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2-xl")
model = GPT2LMHeadModel.from_pretrained("gpt2-xl")

# Load CommonsenseQA subset
dataset = load_dataset("commonsense_qa", split="validation[:10]")

def format_prompt(question, choices):
    choice_str = "\n".join([f"{label}. {text}" for label, text in zip(choices['label'], choices['text'])])
    return f"Question: {question}\nChoices:\n{choice_str}\nAnswer with the correct letter."

def get_response(prompt, max_length=100, temperature=0.7):
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(
        inputs,
        max_length=max_length,
        temperature=temperature,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

def self_consistent_answer(question, choices, n_samples=5):
    prompt = format_prompt(question, choices)
    answers = [get_response(prompt) for _ in range(n_samples)]
    print("Prompt:")
    print(prompt)
    print("Answers:")
    for answer in answers:
        print("------")
        print(answer)
        print("------")
    
    return Counter(answers).most_common(1)[0][0]

# Evaluate self-consistency
correct = 0
for item in dataset:
    prediction = self_consistent_answer(item["question"], item["choices"])
    gold = item["answerKey"]
    if gold in prediction.upper():
        correct += 1

print(f"Self-consistency accuracy over 10 examples: {correct}/10")

Generating train split: 100%|██████████| 9741/9741 [00:00<00:00, 51466.28 examples/s]
Generating validation split: 100%|██████████| 1221/1221 [00:00<00:00, 373895.39 examples/s]
Generating test split: 100%|██████████| 1140/1140 [00:00<00:00, 502470.21 examples/s]
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Prompt:
Question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Choices:
A. bank
B. library
C. department store
D. mall
E. new york
Answer with the correct letter.
Answers:
------
Question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Choices:
A. bank
B. library
C. department store
D. mall
E. new york
Answer with the correct letter.
Question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Choices:
A. bank
B. library
C. department store
------
------
Question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Choices:
A. bank
B. library
C. department store
D. mall
E. new york
Answer with the correct letter.
Question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Choices:
A. bank
B.