In [None]:
# Étape 1 : Prérequis
# Vérification du GPU et installation des dépendances nécessaires
!nvidia-smi
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft
!pip3 install -q -U trl
!pip3 install -q -U accelerate
!pip3 install -q -U datasets
!pip3 install -q -U transformers
!pip3 install -q -U evaluate
!pip3 install -q -U sacrebleu

Thu Aug 22 23:23:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
# Afficher les versions des bibliothèques installées
!nvidia-smi
!echo "Version de bitsandbytes :"
!pip show bitsandbytes
!echo "Version de peft :"
!pip show peft
!echo "Version de trl :"
!pip show trl
!echo "Version de accelerate :"
!pip show accelerate
!echo "Version de datasets :"
!pip show datasets
!echo "Version de transformers :"
!pip show transformers

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

Thu Aug 22 23:24:10 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
# Configuration pour la quantification en 4 bits
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
# Si vous utilisez Google Colab, connectez-vous à Hugging Face
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Étape 2 : Chargement du modèle
# Chargement du modèle avec la configuration de quantification QLoRA pour réduire l'utilisation de la mémoire
model_id = "tayawelba/model-aftaitesting-it-python"
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Fonction pour générer une réponse à partir du modèle
def obtenir_reponse(query: str, model, tokenizer) -> str:
    device = "cuda:0"
    template_prompt = """
    <start_of_turn>utilisateur
    Below is an instruction that describes a task. Write a response that correctly completes the request.
    {query}
    <end_of_turn>\n<start_of_turn>modèle
    """
    prompt = template_prompt.format(query=query)
    encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
    model_inputs = encodeds.to(device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded


In [None]:
# Exemple de test du modèle
resultat = obtenir_reponse(query="write unit tests for this function def add(a,b): return a+b using pytest", model=model, tokenizer=tokenizer)
print(resultat)



    utilisateur
    Below is an instruction that describes a task. Write a response that correctly completes the request.
    write unit tests for this function def add(a,b): return a+b using pytest
    
modèle
    import pytest
from source import add

def test_add_positive():
    assert add(2, 3) == 5

def test_add_zero():
    assert add(5, 0) == 5

def test_add_negative():
    assert add(-2, -3) == -5
    
def test_add_float():
    assert add(2.0, 3.0) == 5.0 


In [None]:
# Étape 3 : Chargement du dataset pour le fine-tuning
# Utilisation d'un dataset pour entraîner des modèles de génération de code
from datasets import load_dataset


In [None]:
# Charger les données à partir du fichier data.json
dataset = load_dataset("json", data_files="/content/data_test.json", split="train")
print(dataset)


Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 601
})


In [None]:
# Transformation des données en prompts adaptés à l'entraînement
def generate_prompt(data_point):
    prefix_text = 'Below is an instruction that describes a task. Write a pytest unit test function that ' \
                  'appropriately completes the request.\n\n'
    if data_point['input']:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} Here is the function:\n{data_point["input"]} <end_of_turn>\n<start_of_turn>model\n{data_point["output"]} <end_of_turn>"""
    else:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} <end_of_turn>\n<start_of_turn>model\n{data_point["output"]} <end_of_turn>"""
    return text


In [None]:
# Ajouter la colonne "prompt" au dataset
text_column = [generate_prompt(data_point) for data_point in dataset]
dataset = dataset.add_column("prompt", text_column)


In [None]:
# Mélanger et diviser le dataset en ensembles d'entraînement, de validation et de test
dataset = dataset.shuffle(seed=1234)
dataset = dataset.map(lambda samples: tokenizer(samples["prompt"]), batched=True)
train_test_valid_data = dataset.train_test_split(test_size=0.2)
train_data = train_test_valid_data["train"]
valid_test_data = train_test_valid_data["test"].train_test_split(test_size=0.5)
valid_data = valid_test_data["train"]
test_data = valid_test_data["test"]

print("Taille des ensembles :")
print(f"Entraînement: {len(train_data)}, Validation: {len(valid_data)}, Test: {len(test_data)}")


Taille des ensembles :
Entraînement: 480, Validation: 60, Test: 61


In [None]:
# Étape 4 : Application de LoRA (Low-Rank Adaptation)
# Application de la technique LoRA pour optimiser l'entraînement du modèle
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model

# Activation du gradient checkpointing pour optimiser la mémoire
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)


In [None]:
import bitsandbytes as bnb

# Identification des modules linéaires du modèle pour LoRA
def trouver_tous_noms_lineaires(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
        if 'lm_head' in lora_module_names:
            lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = trouver_tous_noms_lineaires(model)
print(modules)


['o_proj', 'q_proj', 'v_proj', 'down_proj', 'up_proj', 'gate_proj', 'k_proj']


In [None]:
# Configuration de LoRA
lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


In [None]:
# Application de la configuration LoRA au modèle
model = get_peft_model(model, lora_config)
trainable, total = model.get_nb_trainable_parameters()
print(f"Paramètres entraînables : {trainable} | total : {total} | Pourcentage : {trainable/total*100:.4f}%")


Paramètres entraînables : 83066880 | total : 2697408768 | Pourcentage : 3.0795%


In [None]:
# Étape 5 : Entraînement du modèle
# Fine-tuning du modèle avec qLoRA et Supervised Fine-Tuning (SFT)
import transformers
from trl import SFTTrainer

# Configuration de l'entraîneur
tokenizer.pad_token = tokenizer.eos_token
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=valid_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        save_strategy="epoch",
        evaluation_strategy="epoch"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


In [None]:
# Lancement de l'entraînement
model.config.use_cache = False  # silence les avertissements. Veuillez réactiver pour l'inférence !
trainer.train()

It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Epoch,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 4.20 GiB. GPU 

In [None]:
# Évaluation du modèle avec différentes métriques
from evaluate import load
import numpy as np
from transformers import GPT2TokenizerFast

# Charger les métriques
exact_match = load("exact_match")
bleu = load("sacrebleu")
perplexity_metric = load("perplexity")

In [None]:

# Fonction d'évaluation
def evaluate_model(model, tokenizer, dataset):
    exact_match_scores = []
    bleu_scores = []
    perplexities = []
    for example in dataset:
        generated_output = obtenir_reponse(example["instruction"], model, tokenizer)
        references = [example["output"]]

        # Exact Match
        exact_match_scores.append(exact_match.compute(predictions=[generated_output], references=references)["exact_match"])

        # BLEU Score
        bleu_scores.append(bleu.compute(predictions=[generated_output], references=references)["score"])

        # Perplexity
        encodings = tokenizer(generated_output, return_tensors="pt")
        max_length = model.config.n_positions
        stride = 512
        lls = []
        for i in range(0, encodings.input_ids.size(1), stride):
            begin_loc = max(i + stride - max_length, 0)
            end_loc = min(i + stride, encodings.input_ids.size(1))
            trg_len = end_loc - i
            input_ids = encodings.input_ids[:, begin_loc:end_loc].to("cuda")
            target_ids = input_ids.clone()
            target_ids[:, :-trg_len] = -100

            with torch.no_grad():
                outputs = model(input_ids, labels=target_ids)
                log_likelihood = outputs.loss * trg_len
            lls.append(log_likelihood)

        perplexities.append(torch.exp(torch.stack(lls).sum() / end_loc).item())

    print(f"Exact Match: {np.mean(exact_match_scores):.4f}")
    print(f"BLEU Score: {np.mean(bleu_scores):.4f}")
    print(f"Perplexity: {np.mean(perplexities):.4f}")


In [None]:
# Évaluation sur le set de test
evaluate_model(merged_model, tokenizer, test_data)

In [None]:
# Partage des adaptateurs sur le Hub Hugging Face
nouveau_modele = "model-aftaitesting-it-python" # Nom du modèle que vous allez pousser sur le Hub Hugging Face
trainer.model.save_pretrained(nouveau_modele)

In [None]:
# Fusion et sauvegarde du modèle
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)

merged_model = PeftModel.from_pretrained(base_model, nouveau_modele)
merged_model = merged_model.merge_and_unload()


In [None]:
# Sauvegarder le modèle fusionné
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")

# Pousser le modèle et le tokenizer sur le Hub Hugging Face
merged_model.push_to_hub(nouveau_modele, use_temp_dir=False)
tokenizer.push_to_hub(nouveau_modele, use_temp_dir=False)

In [None]:
# Tester le modèle fine-tuné
resultat = obtenir_reponse(query="write unit tests for this function def add(a,b): return a+b using pytest", model=merged_model, tokenizer=tokenizer)
print(resultat)


# Étape 1 : Prérequis

In [None]:
# Vérification du GPU et installation des dépendances nécessaires
!nvidia-smi
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft
!pip3 install -q -U trl
!pip3 install -q -U accelerate
!pip3 install -q -U datasets
!pip3 install -q -U transformers


/bin/bash: line 1: nvidia-smi: command not found


In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

messages = [
    {"role": "user", "content": |Write a unit test for this function:
              # Instructions to include:
              Use the pytest library to write unit tests. Implement the pairwise testing method to generate all possible combinations of input parameters. Ensure that each test case covers a different combination of parameters to thoroughly test the function. For each combination, generate a distinct test function using clear and descriptive naming, such as test_case_a_very_large_b_near_0, test_case_a_medium_b_high_1, etc. The tests should be well-organized, clear, and check for the expected outputs or behaviors of the function. Include appropriate assertions for each test case to validate the results. give just one block code, no instructions\n
              the function is:  def multiplication(a,b): return a*b using pytest"""},
]
pipe = pipeline("text-generation", model="tayawelba/model-aftaitesting-it-python")
pipe(messages)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/898 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/522 [00:00<?, ?B/s]



ValueError: Input length of input_ids is 28, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [None]:
# Afficher les versions des bibliothèques installées
!nvidia-smi
!echo "Version de bitsandbytes :"
!pip show bitsandbytes
!echo "Version de peft :"
!pip show peft
!echo "Version de trl :"
!pip show trl
!echo "Version de accelerate :"
!pip show accelerate
!echo "Version de datasets :"
!pip show datasets
!echo "Version de transformers :"
!pip show transformers

Thu Aug 22 16:05:31 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P0              30W /  70W |  10237MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Configuration pour la quantification en 4 bits
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)



In [None]:
# Si vous utilisez Google Colab, connectez-vous à Hugging Face
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Étape 2 : Chargement du modèle
Chargement du modèle avec la configuration de quantification QLoRA pour réduire l'utilisation de la mémoire

In [None]:
model_id = "tayawelba/model-aftaitesting-it-python"

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

config.json:   0%|          | 0.00/898 [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Fonction pour générer une réponse à partir du modèle
def obtenir_reponse(query: str, model, tokenizer) -> str:
    device = "cuda:0"

    template_prompt = """
    <start_of_turn>utilisateur
    Below is an instruction that describes a task. Write a response that correctly completes the request.
    {query}
    <end_of_turn>\n<start_of_turn>modèle
    """
    prompt = template_prompt.format(query=query)

    encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
    model_inputs = encodeds.to(device)

    generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded

In [None]:
# Exemple de test du modèle
resultat = obtenir_reponse(query="write unit tests for this function def add(a,b): return a+b using pytest", model=model, tokenizer=tokenizer)
print(resultat)


    utilisateur
    Below is an instruction that describes a task. Write a response that correctly completes the request.
    write unit tests for this function def add(a,b): return a+b using pytest
    
modèle
    import pytest

def test_add():
    assert add(1, 2) == 3 


# Étape 3 : Chargement du dataset pour le fine-tuning



Nous utiliserons ce [dataset](https://huggingface.co/datasets/TokenBender/code_instructions_122k_alpaca_style) qui est un excellent point de départ pour entraîner des modèles de génération de code.

In [None]:
from datasets import load_dataset

# Charger les données à partir du fichier data.json
dataset = load_dataset("json", data_files="/content/data_test.json", split="train")
print(dataset)

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 260
})


In [None]:
# Transformation des données en prompts adaptés à l'entraînement
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenzed prompt
    """
    prefix_text = 'Below is an instruction that describes a task. Write a pytest unit test function that ' \
                  'appropriately completes the request.\n\n'
    # Samples with additional context info.
    if data_point['input']:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} Here is the function:\n{data_point["input"]} <end_of_turn>\n<start_of_turn>model\n{data_point["output"]} <end_of_turn>"""
    # Without additional context
    else:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} <end_of_turn>\n<start_of_turn>model\n{data_point["output"]} <end_of_turn>"""
    return text


In [None]:
# Ajouter la colonne "prompt" au dataset
text_column = [generate_prompt(data_point) for data_point in dataset]
dataset = dataset.add_column("prompt", text_column)

In [None]:
# Mélanger et diviser le dataset en ensembles d'entraînement et de test
dataset = dataset.shuffle(seed=1234)
dataset = dataset.map(lambda samples: tokenizer(samples["prompt"]), batched=True)
dataset = dataset.train_test_split(test_size=0.2)
train_data = dataset["train"]
test_data = dataset["test"]
print(train_data)
print(test_data)

Map:   0%|          | 0/260 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 208
})
Dataset({
    features: ['instruction', 'input', 'output', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 52
})


# Étape 4 : Application de LoRA (Low-Rank Adaptation)
Application de la technique LoRA pour optimiser l'entraînement du modèle


In [None]:
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model

# Activation du gradient checkpointing pour optimiser la mémoire
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
import bitsandbytes as bnb

# Identification des modules linéaires du modèle pour LoRA
def trouver_tous_noms_lineaires(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
        if 'lm_head' in lora_module_names:
            lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [None]:
modules = trouver_tous_noms_lineaires(model)
print(modules)

['gate_proj', 'down_proj', 'q_proj', 'up_proj', 'k_proj', 'v_proj', 'o_proj']


In [None]:
# Configuration de LoRA
lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# Application de la configuration LoRA au modèle
model = get_peft_model(model, lora_config)
trainable, total = model.get_nb_trainable_parameters()
print(f"Paramètres entraînables : {trainable} | total : {total} | Pourcentage : {trainable/total*100:.4f}%")


Paramètres entraînables : 83066880 | total : 2697408768 | Pourcentage : 3.0795%


# Étape 5 : Entraînement du modèle
Fine-tuning du modèle avec qLora et Supervised Fine-Tuning (SFT)

In [None]:
import transformers
from trl import SFTTrainer

# Configuration de l'entraîneur
tokenizer.pad_token = tokenizer.eos_token
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=test_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        save_strategy="epoch",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


In [None]:
# Lancement de l'entraînement
model.config.use_cache = False  # silence les avertissements. Veuillez réactiver pour l'inférence !
trainer.train()



OutOfMemoryError: CUDA out of memory. Tried to allocate 582.00 MiB. GPU 

In [None]:
# Partage des adaptateurs sur le Hub Hugging Face
nouveau_modele = "model-aftaitesting-it-python" # Nom du modèle que vous allez pousser sur le Hub Hugging Face
trainer.model.save_pretrained(nouveau_modele)


In [None]:
# Fusion et sauvegarde du modèle
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)

merged_model = PeftModel.from_pretrained(base_model, nouveau_modele)
merged_model = merged_model.merge_and_unload()

In [None]:
# Sauvegarder le modèle fusionné
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")

In [None]:
# Pousser le modèle et le tokenizer sur le Hub Hugging Face
merged_model.push_to_hub(nouveau_modele, use_temp_dir=False)
tokenizer.push_to_hub(nouveau_modele, use_temp_dir=False)

In [None]:
# Tester le modèle fine-tuné
resultat = obtenir_reponse(query="write unit tests for this function def add(a,b): return a+b using pytest", model=merged_model, tokenizer=tokenizer)
print(resultat)

In [None]:
import torch
import transformers
from trl import SFTTrainer
import matplotlib.pyplot as plt
from datasets import load_metric

# Étape 1 : Calcul de l'accuracy
def calculer_accuracy(predictions, labels):
    """Calcule l'accuracy en comparant les prédictions avec les étiquettes.

    :param predictions: torch.Tensor: Prédictions du modèle
    :param labels: torch.Tensor: Étiquettes réelles
    :return: float: Accuracy
    """
    predictions = predictions.argmax(dim=-1)
    accuracy = (predictions == labels).float().mean()
    return accuracy.item()


In [None]:
# Configuration de l'entraîneur
tokenizer.pad_token = tokenizer.eos_token
torch.cuda.empty_cache()

accuracy_metric = load_metric("accuracy")

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=test_data,
    dataset_text_field="prompt",
    peft_config=lora_config,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        max_steps=100,
        learning_rate=2e-4,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        save_strategy="epoch",
        evaluation_strategy="epoch",  # Effectuer une évaluation après chaque epoch
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)


In [None]:
# Listes pour stocker l'accuracy après chaque epoch
train_accuracies = []
eval_accuracies = []

In [None]:
# Modification de la fonction d'entraînement pour enregistrer l'accuracy
for epoch in range(trainer.args.num_train_epochs):
    # Entraînement
    trainer.train()

    # Évaluation
    eval_results = trainer.evaluate()

    # Calcul de l'accuracy sur l'ensemble d'entraînement et de test
    train_accuracy = calculer_accuracy(eval_results['predictions'], eval_results['label_ids'])
    eval_accuracy = calculer_accuracy(eval_results['eval_predictions'], eval_results['eval_label_ids'])

    train_accuracies.append(train_accuracy)
    eval_accuracies.append(eval_accuracy)

    print(f"Epoch {epoch + 1}: Accuracy Entraînement = {train_accuracy:.4f}, Accuracy Évaluation = {eval_accuracy:.4f}")


In [None]:
# Étape 2 : Tracer les courbes d'accuracy
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(train_accuracies) + 1), train_accuracies, label='Accuracy Entraînement')
plt.plot(range(1, len(eval_accuracies) + 1), eval_accuracies, label='Accuracy Évaluation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Courbes d\'Accuracy au cours de l\'Entraînement')
plt.legend()
plt.grid(True)
plt.show()