In [1]:
import pickle
from tqdm import tqdm

In [2]:
from datasets import load_dataset

ds = load_dataset("tau/commonsense_qa")

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/160k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/151k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9741 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1221 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1140 [00:00<?, ? examples/s]

In [7]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

def load_lm(model_name_or_path: str, device: str | None = None, dtype=None):
    """
    model_name_or_path can be:
      - Hugging Face hub id: "gpt2", "EleutherAI/gpt-j-6B", etc.
      - local folder created by model.save_pretrained(...), containing config.json + weights
    """
    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    if dtype is None:
        # good default on modern NVIDIA GPUs
        dtype = torch.bfloat16 if (device == "cuda" and torch.cuda.is_bf16_supported()) else torch.float16 if device == "cuda" else torch.float32

    tok = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
    # GPT-2-like models often don't have pad_token; set to eos for batching
    if tok.pad_token is None and tok.eos_token is not None:
        tok.pad_token = tok.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_name_or_path,
        torch_dtype=dtype,
        device_map="auto" if device == "cuda" else None,
    )

    # If on CPU, explicitly move
    if device != "cuda":
        model.to(device)

    model.eval()
    return tok, model

# Example
tokenizer, model = load_lm("gpt2")  # or "/path/to/your/checkpoint_dir"




In [21]:
ds['train'][1]

{'id': '61fe6e879ff18686d7552425a36344c8',
 'question': 'Sammy wanted to go to where the people were.  Where might he go?',
 'question_concept': 'people',
 'choices': {'label': ['A', 'B', 'C', 'D', 'E'],
  'text': ['race track',
   'populated areas',
   'the desert',
   'apartment',
   'roadblock']},
 'answerKey': 'B'}

In [18]:
question = ds['train'][1]['question']
choices = "\n".join(f"{l} {t}" for l, t in zip(ds['train'][1]['choices']["label"], ds['train'][1]['choices']["text"]))
    

In [19]:
prompt_fmt = """Answer the given question.
{question}

{choices}
"""

In [20]:
prompt = prompt_fmt.format(question=question, choices=choices)
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(model.device) for k, v in inputs.items()}

with torch.no_grad():
    out = model.generate(
        **inputs,
        max_new_tokens=80,
        do_sample=True,
        temperature=0.8,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id,
    )

print(tokenizer.decode(out[0], skip_special_tokens=True))


Answer the given question.
Sammy wanted to go to where the people were.  Where might he go?

A race track
B populated areas
C the desert
D apartment
E roadblock
F the forest

G a place to run

H a place to sleep

I am not very good at answering all of these questions, especially when you don't know what to ask yourself. For some reason, though, my brain seems to respond to more questions than answers. 

When I ask my question in these situations, I immediately hear the answer.  The answer


In [27]:
import transformers, tokenizers
print("transformers:", transformers.__version__)
print("tokenizers:", tokenizers.__version__)

transformers: 4.44.2
tokenizers: 0.19.1
