Importación de librerías

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    %pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install transformers==4.51.3
    !pip install --no-deps unsloth

from unsloth import FastLanguageModel
import torch
from datasets import Dataset
from trl import SFTTrainer, SFTConfig
from google.colab import drive

Modelo a cargar para Finne Tunning

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-Coder-7B",
    max_seq_length = 40000,   # Context length - can be longer, but uses more memory
    load_in_4bit = True,     # 4bit uses much less memory
    full_finetuning = False, # We have full finetuning now!
)

==((====))==  Unsloth 2025.5.9: Fast Qwen2 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/166 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.87k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Parametros a modificar con el Finne Tunning

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,           # Choose any number > 0! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,  # Best to choose alpha = rank or rank*2
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,   # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

Exportar dataset

In [None]:
from datasets import load_dataset
ds = load_dataset("SWE-bench/SWE-bench", split="test")
ds

README.md:   0%|          | 0.00/3.89k [00:00<?, ?B/s]

dev-00000-of-00001.parquet:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/12.1M [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/106M [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/225 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2294 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/19008 [00:00<?, ? examples/s]

Dataset({
    features: ['repo', 'instance_id', 'base_commit', 'patch', 'test_patch', 'problem_statement', 'hints_text', 'created_at', 'version', 'FAIL_TO_PASS', 'PASS_TO_PASS', 'environment_setup_commit'],
    num_rows: 2294
})

Generar la conversación para el fine tunning

In [None]:
qwen_chat_template = """{% for message in messages %}
{% if message.role == 'system' %}<|im_start|>system
{{ message.content }}<|im_end|>
{% elif message.role == 'user' %}<|im_start|>user
{{ message.content }}<|im_end|>
{% elif message.role == 'assistant' %}<|im_start|>assistant
{{ message.content }}<|im_end|>
{% endif %}{% endfor %}"""


In [None]:
def generate_conversation(examples):
    problems  = examples["problem_statement"]
    solutions = examples["patch"]
    conversations = []

    for problem, solution in zip(problems, solutions):
        instructions = (
            "We're currently solving the following issue within our repository:\n\n"
            "ISSUE:\n"
            f"{problem}\n\n"
            "You will now begin your investigation and implementation.  \n"
            "Guidelines:\n"
            "- Do not use interactive shell commands.\n"
            "- Do not simulate bash prompts like `bash-$`.\n"
            "- Always start with a DISCUSSION.\n"
            "- Follow it with exactly one properly formatted tool command.\n"
            "- Do not include extra outputs or unrelated text."
        )

        conversations.append([
            {"role": "system", "content": instructions},
            {"role" : "user", "content" : problem},
            {"role" : "assitant", "content" : solution},
        ])
    return { "conversations": conversations }


Generar el mapa de la conversación

In [None]:
reasoning_conversations = tokenizer.apply_chat_template(
    ds.map(generate_conversation, batched=True)["conversations"],
    tokenize=False,
    chat_template=qwen_chat_template,
)


Map:   0%|          | 0/2294 [00:00<?, ? examples/s]

Se hace un solo dataset

In [None]:
combined_dataset = Dataset.from_dict({"text": reasoning_conversations})
combined_dataset = combined_dataset.shuffle(seed=3407)

In [None]:
combined_dataset['text'][0]

"<|im_start|>system\nWe're currently solving the following issue within our repository:\n\nISSUE:\nMigration import ordering violates coding style and isort defaults\nDescription\n\t\nNew migration files are generated with imports sorted by module, independent of import style. For example:\nimport datetime\nfrom django.db import migrations, models\nimport time\nThe \u200bDjango coding style specifies:\nPlace all import module statements before from module import objects in each section.\nThis guidance is the same as what isort does by default, \u200bas documented here. Newly generated migrations can fail isort for this reason.\nThis would mean migration files should instead be generated like this:\nimport datetime\nimport time\nfrom django.db import migrations, models\nFor reference, previous issues related to migration import sorting: #24155, #25384.\n\n\nYou will now begin your investigation and implementation.  \nGuidelines:\n- Do not use interactive shell commands.\n- Do not simula

In [None]:
# Obtener el total de ejemplos
total = len(combined_dataset)
train_size = int(0.8 * total)
val_size = int(0.1 * total)

# Separar en train, validation y test
train_dataset = combined_dataset.select(range(train_size))
val_dataset   = combined_dataset.select(range(train_size, train_size + val_size))
test_dataset  = combined_dataset.select(range(train_size + val_size, total))


In [None]:
train_dataset['text'][0]

"<|im_start|>system\nWe're currently solving the following issue within our repository:\n\nISSUE:\nMigration import ordering violates coding style and isort defaults\nDescription\n\t\nNew migration files are generated with imports sorted by module, independent of import style. For example:\nimport datetime\nfrom django.db import migrations, models\nimport time\nThe \u200bDjango coding style specifies:\nPlace all import module statements before from module import objects in each section.\nThis guidance is the same as what isort does by default, \u200bas documented here. Newly generated migrations can fail isort for this reason.\nThis would mean migration files should instead be generated like this:\nimport datetime\nimport time\nfrom django.db import migrations, models\nFor reference, previous issues related to migration import sorting: #24155, #25384.\n\n\nYou will now begin your investigation and implementation.  \nGuidelines:\n- Do not use interactive shell commands.\n- Do not simula

Configuración del entrenamiento

In [None]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        #max_steps = 300,
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/1835 [00:00<?, ? examples/s]

Empezar entrenamiento

In [None]:
trainer_stats = trainer.train()

Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,0.8617
2,1.1196
3,0.7048
4,1.0462
5,0.5745
6,1.011
7,1.1595
8,0.6369
9,0.8288
10,0.9356


Guardar modelo fine tuneado

In [None]:
model.save_pretrained("qwen2.5-coder-finetuned")
tokenizer.save_pretrained("qwen2.5-coder-finetuned")

('qwen2.5-coder-finetuned/tokenizer_config.json',
 'qwen2.5-coder-finetuned/special_tokens_map.json',
 'qwen2.5-coder-finetuned/vocab.json',
 'qwen2.5-coder-finetuned/merges.txt',
 'qwen2.5-coder-finetuned/added_tokens.json',
 'qwen2.5-coder-finetuned/tokenizer.json')

Merge de modelos, y save de archivo gguf

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "qwen2.5-coder-finetuned", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = 40000,
        load_in_4bit = True,
    )

==((====))==  Unsloth 2025.5.9: Fast Qwen2 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
%%capture
model.save_pretrained_merged("qwen2.5-coder", tokenizer, save_method="merged_16bit")
model.save_pretrained_gguf("qwen2.5-coder-gguf", tokenizer, quantization_method="q4_k_m")

In [None]:
# Subir a Hugging Face
model.push_to_hub_gguf(
    repo_id="JavierPS/final-model",
    tokenizer = tokenizer,
    quantization_method = "q4_k_m",
    token = ""
)

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 13.53 out of 83.48 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 58.42it/s]


Unsloth: Saving tokenizer... Done.
Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits might take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: [1] Converting model at JavierPS/final-model into bf16 GGUF format.
The output location will be /content/JavierPS/final-model/unsloth.BF16.gguf
This might take 3 minutes...
INFO:hf-to-gguf:Loading model: final-model
INFO:hf-to-gguf:Model architecture: Qwen2ForCausalLM
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
INFO:hf-to-gguf:Exporting model...
INFO:hf-to-gguf:gguf: loading model weight map from 'model.safetensors.index.json'
INFO:hf-to-gguf:gguf: loading model part 'model-00001-of-00004.safetensors'
INFO:hf-to-g

  0%|          | 0/1 [00:00<?, ?it/s]

unsloth.Q4_K_M.gguf:   0%|          | 0.00/4.68G [00:00<?, ?B/s]

Saved GGUF to https://huggingface.co/JavierPS/final-model
