In [1]:
import os
import sys

from unsloth import FastLanguageModel
import torch

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


ü¶• Unsloth Zoo will now patch everything to make training faster!


In [None]:
max_seq_length = 2048
dtype = None  # None = auto detection (Float16 for T4/V100, Bfloat16 for Ampere+)
load_in_4bit = True  # Essential memory optimization

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# LoRA (Low-Rank Adaptation) Configuration
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Rank: 8, 16, 32, 64. 16 is a good balance.
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

==((====))==  Unsloth 2025.12.4: Fast Mistral patching. Transformers: 4.57.3.
   \\   /|    NVIDIA GeForce RTX 2080 Ti. Num GPUs = 1. Max memory: 10.568 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu130. CUDA: 7.5. CUDA Toolkit: 13.0. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
from datasets import load_dataset

JUSTIFICATION_CRITERIA = """This refers to the nature of the justification of demands. Here we judge to what
extent a speech gives complete justifications for demands. The completeness of
the justifications is judged in terms of the inferences that are made. There are
four levels of justification:
(0) No justification: A speaker only says that X should or should not be done,
but no reason is given.
(1) Inferior justification: Here a reason Y is given as to why X should or should
not be done, but no linkage is made between X and Y ‚Äî the inference is
incomplete. This code also applies if a conclusion is merely supported with
illustrations.
(2) Qualified justification: A linkage is made as to why one should expect that
X contributes to or detracts from Y. A single such complete inference
already qualifies for code 2.
(3) Sophisticated justification: Here at least two complete justifications are
given, either two complete justifications for the same demand or complete
justifications for two different demands.

Analyze the text below and assign a score (0, 1, 2, or 3). Return ONLY the integer."""

alpaca_prompt = """[INST] {system_prompt}

Text to analyze:
{input_text} [/INST] {output_score}"""

EOS_TOKEN = tokenizer.eos_token # Token de fin de phrase essentiel

def formatting_prompts_func(examples):
    inputs = examples["text"]       # Nom de votre colonne texte dans le CSV
    outputs = examples["label"]     # Nom de votre colonne label (0-3) dans le CSV
    texts = []
    for input_text, output_score in zip(inputs, outputs):
        text = alpaca_prompt.format(
            system_prompt = JUSTIFICATION_CRITERIA,
            input_text = input_text,
            output_score = str(output_score) # On convertit le chiffre en string pour l'entrainement
        ) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

dataset = load_dataset("csv", data_files="dataset.csv", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True)

print(dataset[0]["text"])

Generating train split: 32 examples [00:00, 1736.57 examples/s]
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 32/32 [00:00<00:00, 3318.52 examples/s]

[INST] This refers to the nature of the justification of demands. Here we judge to what
extent a speech gives complete justifications for demands. The completeness of
the justifications is judged in terms of the inferences that are made. There are
four levels of justification:
(0) No justification: A speaker only says that X should or should not be done,
but no reason is given.
(1) Inferior justification: Here a reason Y is given as to why X should or should
not be done, but no linkage is made between X and Y ‚Äî the inference is
incomplete. This code also applies if a conclusion is merely supported with
illustrations.
(2) Qualified justification: A linkage is made as to why one should expect that
X contributes to or detracts from Y. A single such complete inference
already qualifies for code 2.
(3) Sophisticated justification: Here at least two complete justifications are
given, either two complete justifications for the same demand or complete
justifications for two different demands




In [4]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Mettre √† True peut acc√©l√©rer l'entra√Ænement si bcp de petites phrases
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60, # AJUSTEZ CECI: commencez petit (60) pour tester, puis augmentez (ex: num_epochs)
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "mistral_justification_finetuned",
    ),
)

trainer.train()

num_proc must be <= 32. Reducing num_proc to 32 for dataset of size 32.
Unsloth: Tokenizing ["text"] (num_proc=32): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 32/32 [00:02<00:00, 12.71 examples/s]
The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 32 | Num Epochs = 15 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 7,289,966,592 (0.58% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.455
2,2.4979
3,2.2214
4,1.8753
5,1.5537
6,1.1781
7,0.7869
8,0.712
9,0.4138
10,0.3085


TrainOutput(global_step=60, training_loss=0.3187627968688806, metrics={'train_runtime': 247.6626, 'train_samples_per_second': 1.938, 'train_steps_per_second': 0.242, 'total_flos': 6396037318606848.0, 'train_loss': 0.3187627968688806, 'epoch': 15.0})

In [9]:
# Sauvegarder en local
model.save_pretrained("mistral_lora_finetuned") 
tokenizer.save_pretrained("mistral_lora_finetuned")

# Pour recharger plus tard :
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "mistral_lora_finetuned", # Charge depuis le dossier local
#     ...
# )

('mistral_lora_finetuned/tokenizer_config.json',
 'mistral_lora_finetuned/special_tokens_map.json',
 'mistral_lora_finetuned/chat_template.jinja',
 'mistral_lora_finetuned/tokenizer.model',
 'mistral_lora_finetuned/added_tokens.json',
 'mistral_lora_finetuned/tokenizer.json')

In [8]:
FastLanguageModel.for_inference(model) # Active le mode inf√©rence (plus rapide)

text_test = "We should lower taxes because it helps the economy." # Exemple (Probablement niveau 1)

inputs = tokenizer(
[
    alpaca_prompt.format(
        system_prompt = JUSTIFICATION_CRITERIA,
        input_text = text_test,
        output_score = "" # On laisse vide pour que le mod√®le compl√®te
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 2, use_cache = True)
result = tokenizer.batch_decode(outputs)

# Nettoyage pour afficher juste le r√©sultat
print(result[0].split("[/INST]")[-1].replace("</s>", "").strip())

2
