In [1]:
!pip install -U peft transformers datasets accelerate bitsandbytes
!pip install -q trl



In [2]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

dataset = load_dataset("Abirate/english_quotes", split="train[:10%]")
data = dataset.map(lambda samples: tokenizer(samples["quote"]), batched=True)
train_sample = data.select(range(50))
display(train_sample)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 10
})

In [3]:
import peft
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=1,
    lora_alpha=1, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none", # this specifies if the bias parameter should be trained.
    task_type="CAUSAL_LM"
)

peft_model = get_peft_model(foundation_model, lora_config)

# Set requires_grad to True for LoRA parameters
for name, param in peft_model.named_parameters():
    if "lora_" in name:
        param.requires_grad = True

print(peft_model.print_trainable_parameters())

trainable params: 98,304 || all params: 559,312,896 || trainable%: 0.0176
None


In [4]:
!pip install evaluate



In [5]:
import numpy as np
import evaluate

# Charge la métrique accuracy
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    mask = labels != -100
    preds = predictions[mask]
    labs = labels[mask]

    return accuracy_metric.compute(predictions=preds, references=labs)

In [6]:
for name, param in peft_model.named_parameters():
    print(f"{name}: requires_grad={param.requires_grad}")

base_model.model.transformer.word_embeddings.weight: requires_grad=False
base_model.model.transformer.word_embeddings_layernorm.weight: requires_grad=False
base_model.model.transformer.word_embeddings_layernorm.bias: requires_grad=False
base_model.model.transformer.h.0.input_layernorm.weight: requires_grad=False
base_model.model.transformer.h.0.input_layernorm.bias: requires_grad=False
base_model.model.transformer.h.0.self_attention.query_key_value.base_layer.weight: requires_grad=False
base_model.model.transformer.h.0.self_attention.query_key_value.base_layer.bias: requires_grad=False
base_model.model.transformer.h.0.self_attention.query_key_value.lora_A.default.weight: requires_grad=True
base_model.model.transformer.h.0.self_attention.query_key_value.lora_B.default.weight: requires_grad=True
base_model.model.transformer.h.0.self_attention.dense.weight: requires_grad=False
base_model.model.transformer.h.0.self_attention.dense.bias: requires_grad=False
base_model.model.transformer.h.0.

In [8]:
import transformers
from transformers import TrainingArguments, Trainer
import os

output_directory = os.path.join("../cache/working", "peft_lab_outputs")
training_args = TrainingArguments(
    report_to="none",
    output_dir=output_directory,
    auto_find_batch_size=True,
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=5,
    metric_for_best_model="accuracy",
    use_cpu=True
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
    compute_metrics=compute_metrics
)
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


TrainOutput(global_step=10, training_loss=13.447837829589844, metrics={'train_runtime': 103.9263, 'train_samples_per_second': 0.481, 'train_steps_per_second': 0.096, 'total_flos': 3919256616960.0, 'train_loss': 13.447837829589844, 'epoch': 5.0})

In [9]:
from peft import PeftModel, PeftConfig
import time

time_now = time.time()

peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")

trainer.model.save_pretrained(peft_model_path)

loaded_model = PeftModel.from_pretrained(foundation_model, peft_model_path, is_trainable=False)

inputs = tokenizer("Two things are infinite: ", return_tensors="pt")


outputs = peft_model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_new_tokens=10,
    eos_token_id=tokenizer.eos_token_id
    )

print(tokenizer.batch_decode(outputs, skip_special_tokens=True))




['Two things are infinite:  once once once once once once once once once once']
