In [1]:
pip install peft==0.4.0 transformers datasets accelerate


Collecting peft==0.4.0
  Downloading peft-0.4.0-py3-none-any.whl.metadata (21 kB)
Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft==0.4.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft==0.4.0)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft==0.4.0)
  Downloading nvidia_cuda_cupti_cu12-12

Chargement du modèle et du tokenizer

In [2]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

3. Chargement et prétraitement du dataset

In [3]:
dataset = load_dataset("Abirate/english_quotes", split="train").shuffle(seed=42)
data = dataset.select(range(int(0.1 * len(dataset))))  # 10% du dataset

# Tokenisation des citations
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

# Vérification des échantillons
train_sample = data.select(range(5))
print(train_sample)


README.md:   0%|          | 0.00/5.55k [00:00<?, ?B/s]

quotes.jsonl:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2508 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 5
})


4. Configuration de LoRA

In [4]:
import peft
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,  # Facteur de réduction des matrices
    lora_alpha=16,  # Facteur d'échelle
    target_modules=["query_key_value"],  # Couches cibles à modifier
    lora_dropout=0.05,  # Dropout pour éviter le sur-ajustement
    bias="none",  # On ne fine-tune pas les biais
    task_type="CAUSAL_LM"
)

# Ajout de LoRA au modèle
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())  # Vérifier les paramètres entraînables


trainable params: 786,432 || all params: 560,001,024 || trainable%: 0.14043402892063284
None


5. Configuration de l'entraînement

In [None]:
import transformers
from transformers import TrainingArguments, Trainer
import os

output_directory = "../cache/working/peft_lab_outputs"

training_args = TrainingArguments(
    report_to="none",
    output_dir=output_directory,
    auto_find_batch_size=True,  # Trouve automatiquement la meilleure batch size
    learning_rate=3e-4,  # Taux d'apprentissage plus élevé que le fine-tuning classique
    num_train_epochs=3,  # Nombre d'époques
    use_cpu=True
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=data,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# Entraînement du modèle
trainer.train()


Step,Training Loss


6. Sauvegarde du modèle fine-tuné

In [None]:
import time

time_now = str(int(time.time()))
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")
trainer.model.save_pretrained(peft_model_path)


7. Chargement du modèle et génération de texte

In [None]:
from peft import PeftModel

# Chargement du modèle fine-tuné
peft_model = PeftModel.from_pretrained(foundation_model, peft_model_path, is_trainable=False)

# Génération d'un texte
inputs = tokenizer("Two things are infinite: ", return_tensors="pt")
outputs = peft_model.generate(
    **inputs,
    max_length=50,
    num_return_sequences=1,
    temperature=0.7
)

print(tokenizer.batch_decode(outputs, skip_special_tokens=True))


 Résumé des modifications :

    Rempli les valeurs de LoraConfig :
        r=8, lora_alpha=16, target_modules=["query_key_value"], lora_dropout=0.05

    Ajout des paramètres d'entraînement :
        auto_find_batch_size=True, learning_rate=3e-4, num_train_epochs=3

    Chargement et prétraitement du dataset :
        Sélection de 10% des données, puis tokenisation.

    Entraînement et sauvegarde du modèle.

    Rechargement du modèle pour l’inférence.