In [1]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os
import time
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model

# Load foundation model and tokenizer
model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

# Load dataset (10% sample)
data = load_dataset("Abirate/english_quotes", split="train[:10%]")
print(f"Number of samples loaded: {len(data)}")

# Tokenization
def tokenize_function(samples):
    tokenized= tokenizer(samples["quote"], truncation=True, padding="longest", return_tensors="pt")

    input_ids = tokenized["input_ids"]
    # Convert to tensor for easier checking
    input_ids_tensor = torch.tensor(input_ids)
    
    if torch.isnan(input_ids_tensor).any():
        print("NaNs found in tokenized input_ids!")
    if (input_ids_tensor < 0).any():
        print("Negative values found in tokenized input_ids!")
    else:
        print("Token IDs look valid (no negatives or NaNs).")

    return tokenized

data = data.map(tokenize_function, batched=True)

# Show a small sample
train_sample = data.select(range(5))
print(train_sample)

# Configure LoRA
lora_config = LoraConfig(
    r=1,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Inject LoRA layers into foundation model
peft_model = get_peft_model(foundation_model, lora_config)
peft_model.print_trainable_parameters()

# Set device
device = "mps" if torch.backends.mps.is_available() else "cpu"
peft_model.to(device)

# Training configuration
output_directory = os.path.join("../cache/working", "peft_lab_outputs")

training_args = TrainingArguments(
    report_to="none",
    output_dir=output_directory,
    per_device_train_batch_size=1,
    learning_rate= 5e-5, # Higher learning rate than full fine-tuning.
    num_train_epochs=6
)

from transformers import TrainerCallback

import torch

class DebugCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        inputs = kwargs.get("inputs")
        outputs = kwargs.get("outputs")

        if inputs is not None:
            input_ids = inputs.get("input_ids")
            labels = inputs.get("labels")

            if input_ids is not None:
                if torch.isnan(input_ids).any():
                    print("NaNs found in input_ids!")
                if (input_ids < 0).any():
                    print("Negative values found in input_ids!")
                print("Input IDs sample:", input_ids[0][:10])

            if labels is not None:
                if torch.isnan(labels).any():
                    print("NaNs found in labels!")
                if (labels < 0).any():
                    print("Negative values found in labels!")
                print("Labels sample:", labels[0][:10])

        if outputs is not None:
            loss = getattr(outputs, "loss", None)
            logits = getattr(outputs, "logits", None)

            if loss is not None:
                print(f"Loss at step {state.global_step}: {loss.item()}")

            if logits is not None:
                if torch.isnan(logits).any():
                    print("NaNs found in logits!")
                if torch.isinf(logits).any():
                    print("Infs found in logits!")
                print("Logits shape:", logits.shape)
                print("Logits sample:", logits[0, 0, :5])

        #if state.global_step >= 10:
        #    control.should_training_stop = True



# Trainer setup
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=data,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
    callbacks=[DebugCallback()]
)

# Train model
trainer.train()

# Save the fine-tuned model
peft_model_path = os.path.join(output_directory, f"peft_model_{int(time.time())}")
trainer.model.save_pretrained(peft_model_path)

# Inference
inputs = tokenizer("Two things are infinite: ", return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    logits = peft_model(**inputs).logits
    print("NaNs in logits?", torch.isnan(logits).any().item())
    print("Infs in logits?", torch.isinf(logits).any().item())
    print("Negative values?", (logits < 0).any().item())


with torch.no_grad():
    outputs = peft_model.generate(
        **inputs,
        max_length=50,
        do_sample=True,
        top_k=50,
        temperature=0.7
    )

print(tokenizer.batch_decode(outputs, skip_special_tokens=True))


  from .autonotebook import tqdm as notebook_tqdm


Number of samples loaded: 251
Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 5
})
trainable params: 98,304 || all params: 559,312,896 || trainable%: 0.01757585078102687


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
500,3.1446
1000,2.8592
1500,2.7521




NaNs in logits? False
Infs in logits? False
Negative values? True
['Two things are infinite:  time and space. Time, because we cannot stop living, and space, because we cannot stop travelling. If one of these is not right, then the other is wrong, and the two have nothing to do with each']
