In [None]:
# Installation des dépendances si nécessaire
!pip install transformers peft accelerate datasets torch evaluate scikit-learn codecarbon

**FINETUNING AVEC LoRa**

In [None]:
import os
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import evaluate
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from codecarbon import EmissionsTracker

# Création du dossier pour les logs CodeCarbon
os.makedirs("./codecarbon_logs", exist_ok=True)

# Suivi de l'empreinte carbone
tracker = EmissionsTracker(project_name="lora-fine-tuning-modernbert", output_dir="./codecarbon_logs")
tracker.start()

# Vérification de l'appareil (GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation de l'appareil : {device}")

# Chargement du dataset IMDB
dataset = load_dataset("imdb")

# Modèle de base
model_name = "answerdotai/ModernBERT-base"

# Tokenizer & modèle
print("Chargement du modèle et du tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
id2label = {0: "neg", 1: "pos"}
label2id = {"neg": 0, "pos": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=2, id2label=id2label, label2id=label2id
)

# Configuration LoRA
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["attn.Wqkv", "attn.Wo", "mlp.Wi", "mlp.Wo"]
)

model = get_peft_model(model, peft_config)

# Activation du gradient checkpointing (optionnel)
model.gradient_checkpointing_enable()

# Geler les couches sauf LoRA et classifier
for name, param in model.named_parameters():
    if "lora" in name or "classifier" in name:
        param.requires_grad = True
        print(f"Fine-tuning : {name}")
    else:
        param.requires_grad = False

# Affichage des paramètres entraînables
print("\nParamètres entraînables :")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

# Tokenisation
def tokenize_function(examples):
    tokens = tokenizer(examples['text'], padding="max_length", truncation=True, max_length=256)
    tokens["label"] = examples["label"]
    return tokens

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Split train/test
split = tokenized_datasets["train"].train_test_split(test_size=0.1)
train_dataset = split["train"]
eval_dataset = split["test"]

# Chargement des métriques
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

class_names = ["neg", "pos"]

# Fonction de calcul des métriques
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)

    acc = accuracy_score(labels, predictions)
    prec, rec, f1_scores, _ = precision_recall_fscore_support(labels, predictions, average=None, zero_division=1)

    metrics = {
        "accuracy": acc,
        "precision_global": prec.mean(),
        "recall_global": rec.mean(),
        "f1_macro": f1_scores.mean()
    }

    for i, (p, r, f) in enumerate(zip(prec, rec, f1_scores)):
        metrics[f"precision_{class_names[i]}"] = p
        metrics[f"recall_{class_names[i]}"] = r
        metrics[f"f1_{class_names[i]}"] = f

    return metrics

# Configuration de l'entraînement
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    learning_rate=2e-4,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    run_name="lora-modernbert-wandb",
    overwrite_output_dir=True,
    disable_tqdm=False
)

# Initialisation du Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Fine-tuning
print("Début du fine-tuning avec LoRA...")
trainer.train()

# Évaluation finale
print("Évaluation du modèle après l'entraînement...")
eval_results = trainer.evaluate()
print(f"Résultats de l'évaluation : {eval_results}")

# Arrêt du suivi de CodeCarbon
tracker.stop()


[codecarbon ERROR @ 14:03:31] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Utilisation de l'appareil : cuda
Chargement du modèle et du tokenizer...


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fine-tuning : base_model.model.model.layers.0.attn.Wqkv.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.0.attn.Wqkv.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.0.attn.Wo.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.0.attn.Wo.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.0.mlp.Wi.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.0.mlp.Wi.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.0.mlp.Wo.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.0.mlp.Wo.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.1.attn.Wqkv.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.1.attn.Wqkv.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.1.attn.Wo.lora_A.default.weight
Fine-tuning : base_model.model.model.layers.1.attn.Wo.lora_B.default.weight
Fine-tuning : base_model.model.model.layers.1.mlp.Wi.lora_A.default.weight
Fine-tuni

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[codecarbon ERROR @ 14:03:38] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Début du fine-tuning avec LoRA...




Epoch,Training Loss,Validation Loss,Accuracy,Precision Global,Recall Global,F1 Macro,Precision Neg,Recall Neg,F1 Neg,Precision Pos,Recall Pos,F1 Pos
1,0.4622,0.441329,0.7956,0.808083,0.79616,0.793715,0.744652,0.896219,0.813436,0.871514,0.696102,0.773994
2,0.3396,0.403245,0.8272,0.828484,0.827374,0.827079,0.8065,0.858407,0.831645,0.850467,0.79634,0.822514
3,0.4579,0.394334,0.8304,0.830409,0.830417,0.8304,0.826816,0.833467,0.830128,0.834002,0.827367,0.830671
4,0.3583,0.390067,0.8348,0.835947,0.834963,0.834702,0.814871,0.864039,0.838735,0.857022,0.805887,0.830668
5,0.3135,0.389587,0.8344,0.836145,0.834601,0.834239,0.810487,0.870475,0.83941,0.861803,0.798727,0.829067




Évaluation du modèle après l'entraînement...




Résultats de l'évaluation : {'eval_loss': 0.3895866572856903, 'eval_accuracy': 0.8344, 'eval_precision_global': 0.8361447332465319, 'eval_recall_global': 0.8346008930840072, 'eval_f1_macro': 0.834238641262951, 'eval_precision_neg': 0.8104868913857678, 'eval_recall_neg': 0.8704746580852776, 'eval_f1_neg': 0.839410395655547, 'eval_precision_pos': 0.8618025751072962, 'eval_recall_pos': 0.7987271280827367, 'eval_f1_pos': 0.8290668868703551, 'eval_runtime': 56.8691, 'eval_samples_per_second': 43.961, 'eval_steps_per_second': 5.504, 'epoch': 5.0}


**FINETUNING SANS LoRa**

In [None]:
#SANS LORA
import os
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import evaluate
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from codecarbon import EmissionsTracker

# Création du dossier pour les logs CodeCarbon
os.makedirs("./codecarbon_logs", exist_ok=True)

# Suivi de l'empreinte carbone
tracker = EmissionsTracker(project_name="finetuning-modernbert", output_dir="./codecarbon_logs")
tracker.start()

# Vérification de l'appareil (GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation de l'appareil : {device}")

# Chargement du dataset IMDB
dataset = load_dataset("imdb")

# Modèle de base
model_name = "answerdotai/ModernBERT-base"

# Tokenizer & modèle
print("Chargement du modèle et du tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
id2label = {0: "neg", 1: "pos"}
label2id = {"neg": 0, "pos": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=2, id2label=id2label, label2id=label2id
)

# Affichage des paramètres entraînables
print("\nParamètres entraînables :")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

# Tokenisation
def tokenize_function(examples):
    tokens = tokenizer(examples['text'], padding="max_length", truncation=True, max_length=256)
    tokens["label"] = examples["label"]
    return tokens

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Split train/test
split = tokenized_datasets["train"].train_test_split(test_size=0.1)
train_dataset = split["train"]
eval_dataset = split["test"]

# Chargement des métriques
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

class_names = ["neg", "pos"]

# Fonction de calcul des métriques
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)

    acc = accuracy_score(labels, predictions)
    prec, rec, f1_scores, _ = precision_recall_fscore_support(labels, predictions, average=None, zero_division=1)

    metrics = {
        "accuracy": acc,
        "precision_global": prec.mean(),
        "recall_global": rec.mean(),
        "f1_macro": f1_scores.mean()
    }

    for i, (p, r, f) in enumerate(zip(prec, rec, f1_scores)):
        metrics[f"precision_{class_names[i]}"] = p
        metrics[f"recall_{class_names[i]}"] = r
        metrics[f"f1_{class_names[i]}"] = f

    return metrics

# Configuration de l'entraînement
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10,
    run_name="finetuning-modernbert",
    overwrite_output_dir=True,
    disable_tqdm=False
)

# Initialisation du Trainer
trainer = Trainer(
    model=model.to(device),
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Fine-tuning
print("Début du fine-tuning sans LoRA...")
trainer.train()

# Évaluation finale
print("Évaluation du modèle après l'entraînement...")
eval_results = trainer.evaluate()
print(f"Résultats de l'évaluation : {eval_results}")

# Arrêt du suivi de CodeCarbon
tracker.stop()


[codecarbon ERROR @ 21:03:18] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Utilisation de l'appareil : cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Chargement du modèle et du tokenizer...


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Paramètres entraînables :
model.embeddings.tok_embeddings.weight
model.embeddings.norm.weight
model.layers.0.attn.Wqkv.weight
model.layers.0.attn.Wo.weight
model.layers.0.mlp_norm.weight
model.layers.0.mlp.Wi.weight
model.layers.0.mlp.Wo.weight
model.layers.1.attn_norm.weight
model.layers.1.attn.Wqkv.weight
model.layers.1.attn.Wo.weight
model.layers.1.mlp_norm.weight
model.layers.1.mlp.Wi.weight
model.layers.1.mlp.Wo.weight
model.layers.2.attn_norm.weight
model.layers.2.attn.Wqkv.weight
model.layers.2.attn.Wo.weight
model.layers.2.mlp_norm.weight
model.layers.2.mlp.Wi.weight
model.layers.2.mlp.Wo.weight
model.layers.3.attn_norm.weight
model.layers.3.attn.Wqkv.weight
model.layers.3.attn.Wo.weight
model.layers.3.mlp_norm.weight
model.layers.3.mlp.Wi.weight
model.layers.3.mlp.Wo.weight
model.layers.4.attn_norm.weight
model.layers.4.attn.Wqkv.weight
model.layers.4.attn.Wo.weight
model.layers.4.mlp_norm.weight
model.layers.4.mlp.Wi.weight
model.layers.4.mlp.Wo.weight
model.layers.5.attn_no

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

[codecarbon ERROR @ 21:04:06] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Début du fine-tuning sans LoRA...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malex-lochain[0m ([33malex-lochain-le-mans-universit-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss,Accuracy,Precision Global,Recall Global,F1 Macro,Precision Neg,Recall Neg,F1 Neg,Precision Pos,Recall Pos,F1 Pos
1,0.3389,0.335505,0.872,0.872028,0.8722,0.871988,0.883721,0.86296,0.873217,0.860335,0.881439,0.870759
2,0.1723,0.412705,0.87,0.869984,0.870155,0.869983,0.880192,0.86296,0.871491,0.859776,0.877351,0.868474
3,0.0072,0.482037,0.8684,0.868438,0.868607,0.868389,0.880417,0.859045,0.8696,0.856459,0.878168,0.867178


W0409 21:31:02.617000 2413 torch/_inductor/utils.py:1137] [1/1] Not enough SMs to use max_autotune_gemm mode


Évaluation du modèle après l'entraînement...




Résultats de l'évaluation : {'eval_loss': 0.48203736543655396, 'eval_accuracy': 0.8684, 'eval_precision_global': 0.8684383328085279, 'eval_recall_global': 0.8686065370659335, 'eval_f1_macro': 0.8683888604331471, 'eval_precision_neg': 0.8804173354735152, 'eval_recall_neg': 0.8590446358653093, 'eval_f1_neg': 0.869599682917162, 'eval_precision_pos': 0.8564593301435407, 'eval_recall_pos': 0.8781684382665577, 'eval_f1_pos': 0.8671780379491321, 'eval_runtime': 57.632, 'eval_samples_per_second': 43.379, 'eval_steps_per_second': 5.431, 'epoch': 3.0}


In [None]:
# Inspecter les noms des modules du modèle pour trouver ceux à cibler avec LoRA
for name, module in model.named_modules():
    print(name)



model
model.embeddings
model.embeddings.tok_embeddings
model.embeddings.norm
model.embeddings.drop
model.layers
model.layers.0
model.layers.0.attn_norm
model.layers.0.attn
model.layers.0.attn.Wqkv
model.layers.0.attn.rotary_emb
model.layers.0.attn.Wo
model.layers.0.attn.out_drop
model.layers.0.mlp_norm
model.layers.0.mlp
model.layers.0.mlp.Wi
model.layers.0.mlp.act
model.layers.0.mlp.drop
model.layers.0.mlp.Wo
model.layers.1
model.layers.1.attn_norm
model.layers.1.attn
model.layers.1.attn.Wqkv
model.layers.1.attn.rotary_emb
model.layers.1.attn.Wo
model.layers.1.attn.out_drop
model.layers.1.mlp_norm
model.layers.1.mlp
model.layers.1.mlp.Wi
model.layers.1.mlp.act
model.layers.1.mlp.drop
model.layers.1.mlp.Wo
model.layers.2
model.layers.2.attn_norm
model.layers.2.attn
model.layers.2.attn.Wqkv
model.layers.2.attn.rotary_emb
model.layers.2.attn.Wo
model.layers.2.attn.out_drop
model.layers.2.mlp_norm
model.layers.2.mlp
model.layers.2.mlp.Wi
model.layers.2.mlp.act
model.layers.2.mlp.drop
mod

In [None]:
!zip -r modernbert_imdb_sans_lora.zip ./