How To Finetune LLMs With LoRA

In [None]:
import os
os.makedirs("cache", exist_ok=True)

In [1]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
# Initialisation du modèle pour génération de texte
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


# Si le tokenizer n'a pas de pad_token, vous pouvez en définir un, souvent c'est l'eos_token
if tokenizer.pad_token is None:
   tokenizer.pad_token = tokenizer.eos_token  # Ou utilisez '[PAD]' si nécessaire

print(tokenizer.pad_token)  # Cela doit afficher un token valide

<|endoftext|>


In [3]:
# Dataset
data = load_dataset("Abirate/english_quotes", split="train[:10%]")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) # + nouvelles colonnes input_ids et attention_mask
train_sample = data.select(range(5))
display(train_sample)

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 5
})

In [4]:
# Vérifier les modules d'attention dans la première couche d'attention
print(model.transformer.h[0].attn)

GPT2Attention(
  (c_attn): Conv1D(nf=2304, nx=768)
  (c_proj): Conv1D(nf=768, nx=768)
  (attn_dropout): Dropout(p=0.1, inplace=False)
  (resid_dropout): Dropout(p=0.1, inplace=False)
)


In [5]:
import peft
from peft import LoraConfig, get_peft_model

# LoRA
lora_config = LoraConfig(
    r=1,
    lora_alpha=1.0, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    target_modules=["c_attn", "c_proj"],
    lora_dropout=0.1,
    bias="none", # this specifies if the bias parameter should be trained.
    task_type="CAUSAL_LM"
)

# Ajouter les couches d'adaptation à notre modèle à entrainer 

peft_model = get_peft_model(model, lora_config)
print(peft_model.print_trainable_parameters())

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'
trainable params: 101,376 || all params: 124,541,184 || trainable%: 0.08139957943550626
None


In [6]:
# Trainer class
import transformers
from transformers import TrainingArguments, Trainer
import os

output_directory = os.path.join("/Users/patash/PSTB/Week_6_LLM/cache/working", "peft_lab_outputs")
training_args = TrainingArguments(
    report_to="none",
    output_dir=output_directory,
    auto_find_batch_size=8,
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=3,
    use_cpu=True
)

# Initialiser Trainer 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


TrainOutput(global_step=96, training_loss=3.043827692667643, metrics={'train_runtime': 271.2308, 'train_samples_per_second': 2.776, 'train_steps_per_second': 0.354, 'total_flos': 40716125872128.0, 'train_loss': 3.043827692667643, 'epoch': 3.0})

In [7]:
import time

time_now = time.strftime("%Y%m%d-%H%M%S")
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")
trainer.model.save_pretrained(peft_model_path)

# Generate output tokens
inputs = tokenizer("Two things are infinite: ", padding=True, truncation=True, return_tensors="pt")
outputs = peft_model.generate(
    input_ids=inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_length=50,
    temperature=1.0,
    )
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


["Two things are infinite: \xa0Life is infinite.”If you don't have to live forever, you can live forever.”Life is infinite.”Life is infinite.”Life is infinite.”Life is infinite"]
