In [1]:
from datasets import load_dataset
import torch
import os
import pandas as pd
from utils import preprocess_qa, RestrictToValidTokens
import torch
import random
import numpy as np

# Fix all seeds
seed = 42
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
pd.options.display.max_colwidth = None
hf_auth_token = os.getenv("HF_AUTH_TOKEN")
ds = load_dataset("tau/commonsense_qa")

In [2]:
trained_folder = "./fine-tuned/llama-qa-lora_16"

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(trained_folder)

# Ensure the pad token is consistent
if tokenizer.pad_token_id is None:
    tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
    tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|pad|>")

# Load the base model
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,  
    llm_int8_threshold=6.0  
)

base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    cache_dir="/fs03/yu60/kojitanaka/model_cache",
    quantization_config=bnb_config
)

base_model.resize_token_embeddings(len(tokenizer))

# Load the LoRA adapter correctly
model = PeftModel.from_pretrained(
    base_model,
    trained_folder,
    is_trainable=True
)

model.print_trainable_parameters()

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


trainable params: 6,815,744 || all params: 8,037,085,184 || trainable%: 0.0848


In [4]:
from transformers import LogitsProcessorList, LogitsProcessor

tokenized_ds = ds.map(preprocess_qa)

valid_tokens = [tokenizer.convert_tokens_to_ids(tk) for tk in ['A', 'B', 'C', 'D', 'E']]  
logits_processor = LogitsProcessorList([RestrictToValidTokens(valid_tokens)])

model.eval()  # switch to eval mode
sample_texts = tokenized_ds['train']['text'][:16]
ground_truths = tokenized_ds['train']['target_text'][:16]

correct_predictions = 0

for i, (sample_text, ground_truth) in enumerate(zip(sample_texts, ground_truths)):
    inputs = tokenizer(sample_text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1,
            eos_token_id=tokenizer.eos_token_id,
            do_sample=False,       # Greedy decoding
            temperature=0.0,
            # logits_processor=logits_processor
        )
    generated_tokens = outputs[0][inputs['input_ids'].shape[1]:]
    prediction_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

    print(f"prediction_text repr: {repr(prediction_text)}")
    print(f"ground_truth repr:   {repr(ground_truth.strip())}")

    if prediction_text == ground_truth.strip():
        correct_predictions += 1
        
    print(f"Progress: {i + 1}/{len(sample_texts)}\n")

accuracy = correct_predictions / len(sample_texts) * 100
print(f"\nAccuracy: {accuracy:.2f}% on the first 100 training samples.")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'A'
ground_truth repr:   'A'
Progress: 1/16

prediction_text repr: 'B'
ground_truth repr:   'B'
Progress: 2/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'A'
ground_truth repr:   'A'
Progress: 3/16

prediction_text repr: 'D'
ground_truth repr:   'D'
Progress: 4/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'C'
ground_truth repr:   'C'
Progress: 5/16

prediction_text repr: 'D'
ground_truth repr:   'D'
Progress: 6/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'E'
ground_truth repr:   'E'
Progress: 7/16

prediction_text repr: 'D'
ground_truth repr:   'B'
Progress: 8/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'E'
ground_truth repr:   'E'
Progress: 9/16

prediction_text repr: 'D'
ground_truth repr:   'D'
Progress: 10/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'A'
ground_truth repr:   'B'
Progress: 11/16

prediction_text repr: 'A'
ground_truth repr:   'C'
Progress: 12/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'A'
ground_truth repr:   'C'
Progress: 13/16

prediction_text repr: 'A'
ground_truth repr:   'A'
Progress: 14/16



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


prediction_text repr: 'C'
ground_truth repr:   'C'
Progress: 15/16

prediction_text repr: 'D'
ground_truth repr:   'D'
Progress: 16/16


Accuracy: 75.00% on the first 100 training samples.


In [5]:
# Ensure deterministic behavior
import torch
torch.manual_seed(42)
max_length=256
def create_tokenized_ds_for_finetune(example):
    prompt_text = preprocess_qa(example)['text']

    tokenized_prompt = tokenizer(prompt_text, truncation=True, padding="max_length", return_tensors="pt", max_length=max_length)

    answer_token = tokenizer(example['answerKey'].strip(), return_tensors="pt", add_special_tokens=False)

    input_ids = tokenized_prompt["input_ids"].squeeze(0)
    attention_mask = tokenized_prompt["attention_mask"].squeeze(0)
    answer_input_ids = answer_token["input_ids"].squeeze(0)

    labels = torch.full_like(input_ids, -100)
    if answer_input_ids.numel() == 1:
        next_pos = input_ids.ne(tokenizer.pad_token_id).sum()
        labels[next_pos] = answer_input_ids.item()
    else:
        start_pos = input_ids.ne(tokenizer.pad_token_id).sum()
        labels[start_pos : start_pos + answer_input_ids.size(0)] = answer_input_ids

    pad_length = max_length - input_ids.shape[0]
    
    if pad_length > 0:
        input_ids = torch.cat([input_ids, torch.full((pad_length,), tokenizer.pad_token_id)])
        attention_mask = torch.cat([attention_mask, torch.zeros(pad_length, dtype=torch.long)])
        labels = torch.cat([labels, torch.full((pad_length,), -100)])
    else:
        input_ids = input_ids[:max_length]
        attention_mask = attention_mask[:max_length]
        labels = labels[:max_length]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

# Prepare a single test sample from your dataset
sample = ds['train'][0]
tokens_info = create_tokenized_ds_for_finetune(sample)
inputs = {
    "input_ids": tokens_info["input_ids"].unsqueeze(0).to(model.device),
    "attention_mask": tokens_info["attention_mask"].unsqueeze(0).to(model.device),
}

ground_truth = sample['answerKey'].strip()
ground_truth_token_id = tokenizer.convert_tokens_to_ids(ground_truth)

# Perform inference and check logits difference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits[:, -1, :]  # Logits for the last token
    predicted_token_id = torch.argmax(logits, dim=-1).item()

    # Logit comparison
    logit_predicted = logits[0, predicted_token_id].item()
    logit_ground_truth = logits[0, ground_truth_token_id].item()


    print("Decoded input with special characters:")
    print(repr(tokenizer.decode(tokens_info["input_ids"])))
    print(f"\nPredicted Token ID: {predicted_token_id} -> {tokenizer.decode([predicted_token_id])}")
    print(f"Ground Truth Token ID: {ground_truth_token_id} -> {tokenizer.decode([ground_truth_token_id])}")
    print(f"Logit Difference: {logit_predicted - logit_ground_truth:.6f}")
    print(f"Logits for predicted token: {logit_predicted}")
    print(f"Logits for ground truth token: {logit_ground_truth}")
    print(f"\nPrediction: {tokenizer.decode([predicted_token_id])}, Ground Truth: {ground_truth}")

Decoded input with special characters:
'<|begin_of_text|>Question: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?. Options: A: ignore, B: enforce, C: authoritarian, D: yell at, E: avoid,. Return only the letter corresponding to the correct answer (A, B, C, D, or E). The answer is <|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|

In [6]:
import torch
import torch.nn.functional as F
# Define the token IDs for A, B, C, D, E
target_tokens = [',', ':', 'A', 'B', 'C', 'D', 'E']
target_token_ids = [tokenizer.convert_tokens_to_ids(token) for token in target_tokens]

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits[:, -1, :]  # Logits for the last token (before softmax)
    
    # Apply softmax to convert logits to probabilities
    probabilities = F.softmax(logits, dim=-1)[0]  # Apply softmax across the last dimension

    # Get the predicted token and ground truth token
    predicted_token_id = torch.argmax(logits, dim=-1).item()
    logit_predicted = logits[0, predicted_token_id].item()
    logit_ground_truth = logits[0, ground_truth_token_id].item()

    # Print token probabilities for A, B, C, D, E
    print("\nProbabilities for A, B, C, D, E:")
    for token, token_id in zip(target_tokens, target_token_ids):
        prob = probabilities[token_id].item()
        print(f"Token: {token} (ID: {token_id}) -> Probability: {prob:.6f}")

    # Print decoded results and probabilities for the predicted token
    print("\nDecoded input with special characters:")
    print(repr(tokenizer.decode(tokens_info["input_ids"])))
    print(f"\nPredicted Token ID: {predicted_token_id} -> {tokenizer.decode([predicted_token_id])}")
    print(f"Ground Truth Token ID: {ground_truth_token_id} -> {tokenizer.decode([ground_truth_token_id])}")
    print(f"Logit Difference: {logit_predicted - logit_ground_truth:.6f}")
    print(f"Logits for predicted token: {logit_predicted}")
    print(f"Logits for ground truth token: {logit_ground_truth}")
    print(f"\nPrediction: {tokenizer.decode([predicted_token_id])}, Ground Truth: {ground_truth}")


Probabilities for A, B, C, D, E:
Token: , (ID: 11) -> Probability: 0.002972
Token: : (ID: 25) -> Probability: 0.054352
Token: A (ID: 32) -> Probability: 0.000028
Token: B (ID: 33) -> Probability: 0.000014
Token: C (ID: 34) -> Probability: 0.000007
Token: D (ID: 35) -> Probability: 0.000003
Token: E (ID: 36) -> Probability: 0.000014

Decoded input with special characters:
'<|begin_of_text|>Question: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?. Options: A: ignore, B: enforce, C: authoritarian, D: yell at, E: avoid,. Return only the letter corresponding to the correct answer (A, B, C, D, or E). The answer is <|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|><|pad|