In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, PeftModel, TaskType
import os
import bitsandbytes as bnb  # For 8-bit quantization
from evaluate import load
from tqdm import tqdm
import torch
import pandas as pd
import re
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
from utils import preprocess_qa, RestrictToValidTokens
pd.options.display.max_colwidth = None
# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
hf_auth_token = os.getenv("HF_AUTH_TOKEN")
ds = load_dataset("tau/commonsense_qa")

In [2]:
trained_folder = "./llama-qa-lora_overfit"

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(trained_folder)

# Ensure the pad token is consistent
if tokenizer.pad_token_id is None:
    tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
    tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|pad|>")

# Load the base model
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,  
    llm_int8_threshold=6.0  
)

base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    cache_dir="/fs03/yu60/kojitanaka/model_cache",
    quantization_config=bnb_config
)

base_model.resize_token_embeddings(len(tokenizer))

# Load the LoRA adapter correctly
model = PeftModel.from_pretrained(
    base_model,
    trained_folder,
    is_trainable=True
)

model.print_trainable_parameters()

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


trainable params: 3,407,872 || all params: 8,033,677,312 || trainable%: 0.0424


In [4]:
from transformers import LogitsProcessorList, LogitsProcessor

tokenized_ds = ds.map(preprocess_qa)
# Convert valid tokens to token IDs
valid_tokens = [tokenizer.convert_tokens_to_ids(token) for token in ['A', 'B', 'C', 'D', 'E']]

# Instantiate the logits processor with valid tokens
logits_processor = LogitsProcessorList([RestrictToValidTokens(valid_tokens)])

correct_predictions = 0
sample_texts = tokenized_ds['train']['text']
ground_truths = tokenized_ds['train']['target_text']

size = 16
for i in range(0, size):
    sample_text = sample_texts[i]
    ground_truth = ground_truths[i].strip()

    inputs = tokenizer(sample_text, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, 
                                 max_new_tokens=1, 
                                 eos_token_id=tokenizer.eos_token_id, 
                                 top_k=5,
                                 logits_processor=logits_processor
                                 )
        generated_tokens = outputs[0][inputs['input_ids'].shape[1]:]
        prediction_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
        if prediction_text == ground_truth:
            correct_predictions += 1
        
        print(f"Prediction: {prediction_text}, Ground Truth: {ground_truth}")
        print(f"Progress: {i + 1}/{size}")

accuracy = correct_predictions / size * 100
print(f"\nAccuracy: {accuracy:.2f}%")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


NameError: name 'torch' is not defined